blob: b013fcf0a5c1e38d6ef0c63cdb4022403281cde5 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinskiae556d32012-05-04 20:18:50 +000029/// createNVPTXISelDag - This pass converts a legalized DAG into a
30/// NVPTX-specific DAG, ready for instruction scheduling.
31FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
32 llvm::CodeGenOpt::Level OptLevel) {
33 return new NVPTXDAGToDAGISel(TM, OptLevel);
34}
35
Justin Holewinskiae556d32012-05-04 20:18:50 +000036NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
37 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000038 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000039 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000040}
Justin Holewinskiae556d32012-05-04 20:18:50 +000041
Eric Christopher147bba22015-01-30 01:40:59 +000042bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Justin Lebar077f8fb2017-01-21 01:00:14 +000043 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
44 return SelectionDAGISel::runOnMachineFunction(MF);
Eric Christopher147bba22015-01-30 01:40:59 +000045}
46
Justin Holewinskicd069e62013-07-22 12:18:04 +000047int NVPTXDAGToDAGISel::getDivF32Level() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000048 return Subtarget->getTargetLowering()->getDivF32Level();
Justin Holewinskicd069e62013-07-22 12:18:04 +000049}
Justin Holewinskiae556d32012-05-04 20:18:50 +000050
Justin Holewinskicd069e62013-07-22 12:18:04 +000051bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000052 return Subtarget->getTargetLowering()->usePrecSqrtF32();
Justin Holewinskicd069e62013-07-22 12:18:04 +000053}
54
55bool NVPTXDAGToDAGISel::useF32FTZ() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000056 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
Justin Holewinskiae556d32012-05-04 20:18:50 +000057}
58
Justin Holewinski428cf0e2014-07-17 18:10:09 +000059bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +000060 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +000061 return TL->allowFMA(*MF, OptLevel);
62}
63
Artem Belevichd109f462017-01-13 18:48:13 +000064bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
65 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
66 return TL->allowUnsafeFPMath(*MF);
67}
68
Justin Holewinskiae556d32012-05-04 20:18:50 +000069/// Select - Select instructions not customized! Used for
70/// expanded, promoted and normal instructions.
Justin Bogner8d83fb62016-05-13 21:12:53 +000071void NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000072
Tim Northover31d093c2013-09-22 08:21:56 +000073 if (N->isMachineOpcode()) {
74 N->setNodeId(-1);
Justin Bogner8d83fb62016-05-13 21:12:53 +000075 return; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +000076 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000077
Justin Holewinskiae556d32012-05-04 20:18:50 +000078 switch (N->getOpcode()) {
79 case ISD::LOAD:
Justin Bogner8d83fb62016-05-13 21:12:53 +000080 if (tryLoad(N))
81 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000082 break;
83 case ISD::STORE:
Justin Bogner8d83fb62016-05-13 21:12:53 +000084 if (tryStore(N))
85 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000086 break;
Artem Belevich620db1f2017-02-23 22:38:24 +000087 case ISD::EXTRACT_VECTOR_ELT:
88 if (tryEXTRACT_VECTOR_ELEMENT(N))
89 return;
90 break;
91 case NVPTXISD::SETP_F16X2:
92 SelectSETP_F16X2(N);
93 return;
94
Justin Holewinskibe8dc642013-02-12 14:18:49 +000095 case NVPTXISD::LoadV2:
96 case NVPTXISD::LoadV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +000097 if (tryLoadVector(N))
98 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000099 break;
100 case NVPTXISD::LDGV2:
101 case NVPTXISD::LDGV4:
102 case NVPTXISD::LDUV2:
103 case NVPTXISD::LDUV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000104 if (tryLDGLDU(N))
105 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000106 break;
107 case NVPTXISD::StoreV2:
108 case NVPTXISD::StoreV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000109 if (tryStoreVector(N))
110 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000111 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000112 case NVPTXISD::LoadParam:
113 case NVPTXISD::LoadParamV2:
114 case NVPTXISD::LoadParamV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000115 if (tryLoadParam(N))
116 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000117 break;
118 case NVPTXISD::StoreRetval:
119 case NVPTXISD::StoreRetvalV2:
120 case NVPTXISD::StoreRetvalV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000121 if (tryStoreRetval(N))
122 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000123 break;
124 case NVPTXISD::StoreParam:
125 case NVPTXISD::StoreParamV2:
126 case NVPTXISD::StoreParamV4:
127 case NVPTXISD::StoreParamS32:
128 case NVPTXISD::StoreParamU32:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000129 if (tryStoreParam(N))
130 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000131 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000132 case ISD::INTRINSIC_WO_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000133 if (tryIntrinsicNoChain(N))
134 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000135 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000136 case ISD::INTRINSIC_W_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000137 if (tryIntrinsicChain(N))
138 return;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000139 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000140 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000141 case NVPTXISD::Tex1DFloatFloat:
142 case NVPTXISD::Tex1DFloatFloatLevel:
143 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000144 case NVPTXISD::Tex1DS32S32:
145 case NVPTXISD::Tex1DS32Float:
146 case NVPTXISD::Tex1DS32FloatLevel:
147 case NVPTXISD::Tex1DS32FloatGrad:
148 case NVPTXISD::Tex1DU32S32:
149 case NVPTXISD::Tex1DU32Float:
150 case NVPTXISD::Tex1DU32FloatLevel:
151 case NVPTXISD::Tex1DU32FloatGrad:
152 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000153 case NVPTXISD::Tex1DArrayFloatFloat:
154 case NVPTXISD::Tex1DArrayFloatFloatLevel:
155 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000156 case NVPTXISD::Tex1DArrayS32S32:
157 case NVPTXISD::Tex1DArrayS32Float:
158 case NVPTXISD::Tex1DArrayS32FloatLevel:
159 case NVPTXISD::Tex1DArrayS32FloatGrad:
160 case NVPTXISD::Tex1DArrayU32S32:
161 case NVPTXISD::Tex1DArrayU32Float:
162 case NVPTXISD::Tex1DArrayU32FloatLevel:
163 case NVPTXISD::Tex1DArrayU32FloatGrad:
164 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000165 case NVPTXISD::Tex2DFloatFloat:
166 case NVPTXISD::Tex2DFloatFloatLevel:
167 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000168 case NVPTXISD::Tex2DS32S32:
169 case NVPTXISD::Tex2DS32Float:
170 case NVPTXISD::Tex2DS32FloatLevel:
171 case NVPTXISD::Tex2DS32FloatGrad:
172 case NVPTXISD::Tex2DU32S32:
173 case NVPTXISD::Tex2DU32Float:
174 case NVPTXISD::Tex2DU32FloatLevel:
175 case NVPTXISD::Tex2DU32FloatGrad:
176 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000177 case NVPTXISD::Tex2DArrayFloatFloat:
178 case NVPTXISD::Tex2DArrayFloatFloatLevel:
179 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000180 case NVPTXISD::Tex2DArrayS32S32:
181 case NVPTXISD::Tex2DArrayS32Float:
182 case NVPTXISD::Tex2DArrayS32FloatLevel:
183 case NVPTXISD::Tex2DArrayS32FloatGrad:
184 case NVPTXISD::Tex2DArrayU32S32:
185 case NVPTXISD::Tex2DArrayU32Float:
186 case NVPTXISD::Tex2DArrayU32FloatLevel:
187 case NVPTXISD::Tex2DArrayU32FloatGrad:
188 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000189 case NVPTXISD::Tex3DFloatFloat:
190 case NVPTXISD::Tex3DFloatFloatLevel:
191 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000192 case NVPTXISD::Tex3DS32S32:
193 case NVPTXISD::Tex3DS32Float:
194 case NVPTXISD::Tex3DS32FloatLevel:
195 case NVPTXISD::Tex3DS32FloatGrad:
196 case NVPTXISD::Tex3DU32S32:
197 case NVPTXISD::Tex3DU32Float:
198 case NVPTXISD::Tex3DU32FloatLevel:
199 case NVPTXISD::Tex3DU32FloatGrad:
200 case NVPTXISD::TexCubeFloatFloat:
201 case NVPTXISD::TexCubeFloatFloatLevel:
202 case NVPTXISD::TexCubeS32Float:
203 case NVPTXISD::TexCubeS32FloatLevel:
204 case NVPTXISD::TexCubeU32Float:
205 case NVPTXISD::TexCubeU32FloatLevel:
206 case NVPTXISD::TexCubeArrayFloatFloat:
207 case NVPTXISD::TexCubeArrayFloatFloatLevel:
208 case NVPTXISD::TexCubeArrayS32Float:
209 case NVPTXISD::TexCubeArrayS32FloatLevel:
210 case NVPTXISD::TexCubeArrayU32Float:
211 case NVPTXISD::TexCubeArrayU32FloatLevel:
212 case NVPTXISD::Tld4R2DFloatFloat:
213 case NVPTXISD::Tld4G2DFloatFloat:
214 case NVPTXISD::Tld4B2DFloatFloat:
215 case NVPTXISD::Tld4A2DFloatFloat:
216 case NVPTXISD::Tld4R2DS64Float:
217 case NVPTXISD::Tld4G2DS64Float:
218 case NVPTXISD::Tld4B2DS64Float:
219 case NVPTXISD::Tld4A2DS64Float:
220 case NVPTXISD::Tld4R2DU64Float:
221 case NVPTXISD::Tld4G2DU64Float:
222 case NVPTXISD::Tld4B2DU64Float:
223 case NVPTXISD::Tld4A2DU64Float:
224 case NVPTXISD::TexUnified1DFloatS32:
225 case NVPTXISD::TexUnified1DFloatFloat:
226 case NVPTXISD::TexUnified1DFloatFloatLevel:
227 case NVPTXISD::TexUnified1DFloatFloatGrad:
228 case NVPTXISD::TexUnified1DS32S32:
229 case NVPTXISD::TexUnified1DS32Float:
230 case NVPTXISD::TexUnified1DS32FloatLevel:
231 case NVPTXISD::TexUnified1DS32FloatGrad:
232 case NVPTXISD::TexUnified1DU32S32:
233 case NVPTXISD::TexUnified1DU32Float:
234 case NVPTXISD::TexUnified1DU32FloatLevel:
235 case NVPTXISD::TexUnified1DU32FloatGrad:
236 case NVPTXISD::TexUnified1DArrayFloatS32:
237 case NVPTXISD::TexUnified1DArrayFloatFloat:
238 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
239 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
240 case NVPTXISD::TexUnified1DArrayS32S32:
241 case NVPTXISD::TexUnified1DArrayS32Float:
242 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
243 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
244 case NVPTXISD::TexUnified1DArrayU32S32:
245 case NVPTXISD::TexUnified1DArrayU32Float:
246 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
247 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
248 case NVPTXISD::TexUnified2DFloatS32:
249 case NVPTXISD::TexUnified2DFloatFloat:
250 case NVPTXISD::TexUnified2DFloatFloatLevel:
251 case NVPTXISD::TexUnified2DFloatFloatGrad:
252 case NVPTXISD::TexUnified2DS32S32:
253 case NVPTXISD::TexUnified2DS32Float:
254 case NVPTXISD::TexUnified2DS32FloatLevel:
255 case NVPTXISD::TexUnified2DS32FloatGrad:
256 case NVPTXISD::TexUnified2DU32S32:
257 case NVPTXISD::TexUnified2DU32Float:
258 case NVPTXISD::TexUnified2DU32FloatLevel:
259 case NVPTXISD::TexUnified2DU32FloatGrad:
260 case NVPTXISD::TexUnified2DArrayFloatS32:
261 case NVPTXISD::TexUnified2DArrayFloatFloat:
262 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
263 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
264 case NVPTXISD::TexUnified2DArrayS32S32:
265 case NVPTXISD::TexUnified2DArrayS32Float:
266 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
267 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
268 case NVPTXISD::TexUnified2DArrayU32S32:
269 case NVPTXISD::TexUnified2DArrayU32Float:
270 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
271 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
272 case NVPTXISD::TexUnified3DFloatS32:
273 case NVPTXISD::TexUnified3DFloatFloat:
274 case NVPTXISD::TexUnified3DFloatFloatLevel:
275 case NVPTXISD::TexUnified3DFloatFloatGrad:
276 case NVPTXISD::TexUnified3DS32S32:
277 case NVPTXISD::TexUnified3DS32Float:
278 case NVPTXISD::TexUnified3DS32FloatLevel:
279 case NVPTXISD::TexUnified3DS32FloatGrad:
280 case NVPTXISD::TexUnified3DU32S32:
281 case NVPTXISD::TexUnified3DU32Float:
282 case NVPTXISD::TexUnified3DU32FloatLevel:
283 case NVPTXISD::TexUnified3DU32FloatGrad:
284 case NVPTXISD::TexUnifiedCubeFloatFloat:
285 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
286 case NVPTXISD::TexUnifiedCubeS32Float:
287 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
288 case NVPTXISD::TexUnifiedCubeU32Float:
289 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
290 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
291 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
292 case NVPTXISD::TexUnifiedCubeArrayS32Float:
293 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
294 case NVPTXISD::TexUnifiedCubeArrayU32Float:
295 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
296 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
297 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
298 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
299 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
300 case NVPTXISD::Tld4UnifiedR2DS64Float:
301 case NVPTXISD::Tld4UnifiedG2DS64Float:
302 case NVPTXISD::Tld4UnifiedB2DS64Float:
303 case NVPTXISD::Tld4UnifiedA2DS64Float:
304 case NVPTXISD::Tld4UnifiedR2DU64Float:
305 case NVPTXISD::Tld4UnifiedG2DU64Float:
306 case NVPTXISD::Tld4UnifiedB2DU64Float:
307 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000308 if (tryTextureIntrinsic(N))
309 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000310 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000311 case NVPTXISD::Suld1DI8Clamp:
312 case NVPTXISD::Suld1DI16Clamp:
313 case NVPTXISD::Suld1DI32Clamp:
314 case NVPTXISD::Suld1DI64Clamp:
315 case NVPTXISD::Suld1DV2I8Clamp:
316 case NVPTXISD::Suld1DV2I16Clamp:
317 case NVPTXISD::Suld1DV2I32Clamp:
318 case NVPTXISD::Suld1DV2I64Clamp:
319 case NVPTXISD::Suld1DV4I8Clamp:
320 case NVPTXISD::Suld1DV4I16Clamp:
321 case NVPTXISD::Suld1DV4I32Clamp:
322 case NVPTXISD::Suld1DArrayI8Clamp:
323 case NVPTXISD::Suld1DArrayI16Clamp:
324 case NVPTXISD::Suld1DArrayI32Clamp:
325 case NVPTXISD::Suld1DArrayI64Clamp:
326 case NVPTXISD::Suld1DArrayV2I8Clamp:
327 case NVPTXISD::Suld1DArrayV2I16Clamp:
328 case NVPTXISD::Suld1DArrayV2I32Clamp:
329 case NVPTXISD::Suld1DArrayV2I64Clamp:
330 case NVPTXISD::Suld1DArrayV4I8Clamp:
331 case NVPTXISD::Suld1DArrayV4I16Clamp:
332 case NVPTXISD::Suld1DArrayV4I32Clamp:
333 case NVPTXISD::Suld2DI8Clamp:
334 case NVPTXISD::Suld2DI16Clamp:
335 case NVPTXISD::Suld2DI32Clamp:
336 case NVPTXISD::Suld2DI64Clamp:
337 case NVPTXISD::Suld2DV2I8Clamp:
338 case NVPTXISD::Suld2DV2I16Clamp:
339 case NVPTXISD::Suld2DV2I32Clamp:
340 case NVPTXISD::Suld2DV2I64Clamp:
341 case NVPTXISD::Suld2DV4I8Clamp:
342 case NVPTXISD::Suld2DV4I16Clamp:
343 case NVPTXISD::Suld2DV4I32Clamp:
344 case NVPTXISD::Suld2DArrayI8Clamp:
345 case NVPTXISD::Suld2DArrayI16Clamp:
346 case NVPTXISD::Suld2DArrayI32Clamp:
347 case NVPTXISD::Suld2DArrayI64Clamp:
348 case NVPTXISD::Suld2DArrayV2I8Clamp:
349 case NVPTXISD::Suld2DArrayV2I16Clamp:
350 case NVPTXISD::Suld2DArrayV2I32Clamp:
351 case NVPTXISD::Suld2DArrayV2I64Clamp:
352 case NVPTXISD::Suld2DArrayV4I8Clamp:
353 case NVPTXISD::Suld2DArrayV4I16Clamp:
354 case NVPTXISD::Suld2DArrayV4I32Clamp:
355 case NVPTXISD::Suld3DI8Clamp:
356 case NVPTXISD::Suld3DI16Clamp:
357 case NVPTXISD::Suld3DI32Clamp:
358 case NVPTXISD::Suld3DI64Clamp:
359 case NVPTXISD::Suld3DV2I8Clamp:
360 case NVPTXISD::Suld3DV2I16Clamp:
361 case NVPTXISD::Suld3DV2I32Clamp:
362 case NVPTXISD::Suld3DV2I64Clamp:
363 case NVPTXISD::Suld3DV4I8Clamp:
364 case NVPTXISD::Suld3DV4I16Clamp:
365 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000366 case NVPTXISD::Suld1DI8Trap:
367 case NVPTXISD::Suld1DI16Trap:
368 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000369 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000370 case NVPTXISD::Suld1DV2I8Trap:
371 case NVPTXISD::Suld1DV2I16Trap:
372 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000373 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000374 case NVPTXISD::Suld1DV4I8Trap:
375 case NVPTXISD::Suld1DV4I16Trap:
376 case NVPTXISD::Suld1DV4I32Trap:
377 case NVPTXISD::Suld1DArrayI8Trap:
378 case NVPTXISD::Suld1DArrayI16Trap:
379 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000380 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000381 case NVPTXISD::Suld1DArrayV2I8Trap:
382 case NVPTXISD::Suld1DArrayV2I16Trap:
383 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000384 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DArrayV4I8Trap:
386 case NVPTXISD::Suld1DArrayV4I16Trap:
387 case NVPTXISD::Suld1DArrayV4I32Trap:
388 case NVPTXISD::Suld2DI8Trap:
389 case NVPTXISD::Suld2DI16Trap:
390 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000391 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000392 case NVPTXISD::Suld2DV2I8Trap:
393 case NVPTXISD::Suld2DV2I16Trap:
394 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000395 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000396 case NVPTXISD::Suld2DV4I8Trap:
397 case NVPTXISD::Suld2DV4I16Trap:
398 case NVPTXISD::Suld2DV4I32Trap:
399 case NVPTXISD::Suld2DArrayI8Trap:
400 case NVPTXISD::Suld2DArrayI16Trap:
401 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000402 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000403 case NVPTXISD::Suld2DArrayV2I8Trap:
404 case NVPTXISD::Suld2DArrayV2I16Trap:
405 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000406 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000407 case NVPTXISD::Suld2DArrayV4I8Trap:
408 case NVPTXISD::Suld2DArrayV4I16Trap:
409 case NVPTXISD::Suld2DArrayV4I32Trap:
410 case NVPTXISD::Suld3DI8Trap:
411 case NVPTXISD::Suld3DI16Trap:
412 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000413 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000414 case NVPTXISD::Suld3DV2I8Trap:
415 case NVPTXISD::Suld3DV2I16Trap:
416 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000417 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000418 case NVPTXISD::Suld3DV4I8Trap:
419 case NVPTXISD::Suld3DV4I16Trap:
420 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld1DI8Zero:
422 case NVPTXISD::Suld1DI16Zero:
423 case NVPTXISD::Suld1DI32Zero:
424 case NVPTXISD::Suld1DI64Zero:
425 case NVPTXISD::Suld1DV2I8Zero:
426 case NVPTXISD::Suld1DV2I16Zero:
427 case NVPTXISD::Suld1DV2I32Zero:
428 case NVPTXISD::Suld1DV2I64Zero:
429 case NVPTXISD::Suld1DV4I8Zero:
430 case NVPTXISD::Suld1DV4I16Zero:
431 case NVPTXISD::Suld1DV4I32Zero:
432 case NVPTXISD::Suld1DArrayI8Zero:
433 case NVPTXISD::Suld1DArrayI16Zero:
434 case NVPTXISD::Suld1DArrayI32Zero:
435 case NVPTXISD::Suld1DArrayI64Zero:
436 case NVPTXISD::Suld1DArrayV2I8Zero:
437 case NVPTXISD::Suld1DArrayV2I16Zero:
438 case NVPTXISD::Suld1DArrayV2I32Zero:
439 case NVPTXISD::Suld1DArrayV2I64Zero:
440 case NVPTXISD::Suld1DArrayV4I8Zero:
441 case NVPTXISD::Suld1DArrayV4I16Zero:
442 case NVPTXISD::Suld1DArrayV4I32Zero:
443 case NVPTXISD::Suld2DI8Zero:
444 case NVPTXISD::Suld2DI16Zero:
445 case NVPTXISD::Suld2DI32Zero:
446 case NVPTXISD::Suld2DI64Zero:
447 case NVPTXISD::Suld2DV2I8Zero:
448 case NVPTXISD::Suld2DV2I16Zero:
449 case NVPTXISD::Suld2DV2I32Zero:
450 case NVPTXISD::Suld2DV2I64Zero:
451 case NVPTXISD::Suld2DV4I8Zero:
452 case NVPTXISD::Suld2DV4I16Zero:
453 case NVPTXISD::Suld2DV4I32Zero:
454 case NVPTXISD::Suld2DArrayI8Zero:
455 case NVPTXISD::Suld2DArrayI16Zero:
456 case NVPTXISD::Suld2DArrayI32Zero:
457 case NVPTXISD::Suld2DArrayI64Zero:
458 case NVPTXISD::Suld2DArrayV2I8Zero:
459 case NVPTXISD::Suld2DArrayV2I16Zero:
460 case NVPTXISD::Suld2DArrayV2I32Zero:
461 case NVPTXISD::Suld2DArrayV2I64Zero:
462 case NVPTXISD::Suld2DArrayV4I8Zero:
463 case NVPTXISD::Suld2DArrayV4I16Zero:
464 case NVPTXISD::Suld2DArrayV4I32Zero:
465 case NVPTXISD::Suld3DI8Zero:
466 case NVPTXISD::Suld3DI16Zero:
467 case NVPTXISD::Suld3DI32Zero:
468 case NVPTXISD::Suld3DI64Zero:
469 case NVPTXISD::Suld3DV2I8Zero:
470 case NVPTXISD::Suld3DV2I16Zero:
471 case NVPTXISD::Suld3DV2I32Zero:
472 case NVPTXISD::Suld3DV2I64Zero:
473 case NVPTXISD::Suld3DV4I8Zero:
474 case NVPTXISD::Suld3DV4I16Zero:
475 case NVPTXISD::Suld3DV4I32Zero:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000476 if (trySurfaceIntrinsic(N))
477 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000478 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000479 case ISD::AND:
480 case ISD::SRA:
481 case ISD::SRL:
482 // Try to select BFE
Justin Bogner8d83fb62016-05-13 21:12:53 +0000483 if (tryBFE(N))
484 return;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000485 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000486 case ISD::ADDRSPACECAST:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000487 SelectAddrSpaceCast(N);
488 return;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000489 case ISD::ConstantFP:
490 if (tryConstantFP16(N))
491 return;
492 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000493 default:
494 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000495 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000496 SelectCode(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000497}
498
Artem Belevich3bafc2f2017-10-12 18:27:55 +0000499// Each instruction has four addressing variants. WMMA_VARIANTS() macro below
500// constructs an array indexed by WmmaVariant which getWmmaLdVariant() uses to
501// look up the intrinsic ID of particular variant.
502enum WmmaVariant {
503 WMMA_VARIANT_ARI64,
504 WMMA_VARIANT_ARI64_STRIDE,
505 WMMA_VARIANT_AVAR,
506 WMMA_VARIANT_AVAR_STRIDE,
507};
508
509// clang-format off
510#define WMMA_VARIANTS(base) \
511 {{ base##_ari64, base##_ari64_stride, base##_avar, base##_avar_stride }}
512// clang-format on
513
514static unsigned getWmmaLdVariant(WmmaVariant Variant, bool Stride,
515 const std::array<unsigned, 4> Variants) {
516 if (Stride) {
517 if (Variant == WMMA_VARIANT_ARI64)
518 Variant = WMMA_VARIANT_ARI64_STRIDE;
519 else if (Variant == WMMA_VARIANT_AVAR)
520 Variant = WMMA_VARIANT_AVAR_STRIDE;
521 }
522 return Variants[Variant];
523}
524
525static Optional<unsigned>
526getWmmaLdStOpcode(unsigned IntrinsicID,
527 WmmaVariant Variant = WMMA_VARIANT_ARI64) {
528 switch (IntrinsicID) {
529 default:
530 return None;
531 //
532 // WMMA_LOAD_A f16
533 //
534 case Intrinsic::nvvm_wmma_load_a_f16_col:
535 return getWmmaLdVariant(Variant, /*Stride=*/false,
536 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col));
537 case Intrinsic::nvvm_wmma_load_a_f16_row:
538 return getWmmaLdVariant(Variant, /*Stride=*/false,
539 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row));
540 case Intrinsic::nvvm_wmma_load_a_f16_col_stride:
541 return getWmmaLdVariant(Variant, /*Stride=*/true,
542 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col));
543 case Intrinsic::nvvm_wmma_load_a_f16_row_stride:
544 return getWmmaLdVariant(Variant, /*Stride=*/true,
545 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row));
546 case Intrinsic::nvvm_wmma_load_a_f16_col_shared:
547 return getWmmaLdVariant(Variant, /*Stride=*/false,
548 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_shared));
549 case Intrinsic::nvvm_wmma_load_a_f16_row_shared:
550 return getWmmaLdVariant(Variant, /*Stride=*/false,
551 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_shared));
552 case Intrinsic::nvvm_wmma_load_a_f16_col_shared_stride:
553 return getWmmaLdVariant(Variant, /*Stride=*/true,
554 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_shared));
555 case Intrinsic::nvvm_wmma_load_a_f16_row_shared_stride:
556 return getWmmaLdVariant(Variant, /*Stride=*/true,
557 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_shared));
558 case Intrinsic::nvvm_wmma_load_a_f16_col_global:
559 return getWmmaLdVariant(Variant, /*Stride=*/false,
560 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_global));
561 case Intrinsic::nvvm_wmma_load_a_f16_row_global:
562 return getWmmaLdVariant(Variant, /*Stride=*/false,
563 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_global));
564 case Intrinsic::nvvm_wmma_load_a_f16_col_global_stride:
565 return getWmmaLdVariant(Variant, /*Stride=*/true,
566 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_global));
567 case Intrinsic::nvvm_wmma_load_a_f16_row_global_stride:
568 return getWmmaLdVariant(Variant, /*Stride=*/true,
569 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_global));
570
571 //
572 // WMMA_LOAD_B f16
573 //
574 case Intrinsic::nvvm_wmma_load_b_f16_col:
575 return getWmmaLdVariant(Variant, /*Stride=*/false,
576 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col));
577 case Intrinsic::nvvm_wmma_load_b_f16_row:
578 return getWmmaLdVariant(Variant, /*Stride=*/false,
579 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row));
580 case Intrinsic::nvvm_wmma_load_b_f16_col_stride:
581 return getWmmaLdVariant(Variant, /*Stride=*/true,
582 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col));
583 case Intrinsic::nvvm_wmma_load_b_f16_row_stride:
584 return getWmmaLdVariant(Variant, /*Stride=*/true,
585 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row));
586 case Intrinsic::nvvm_wmma_load_b_f16_col_shared:
587 return getWmmaLdVariant(Variant, /*Stride=*/false,
588 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_shared));
589 case Intrinsic::nvvm_wmma_load_b_f16_row_shared:
590 return getWmmaLdVariant(Variant, /*Stride=*/false,
591 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_shared));
592 case Intrinsic::nvvm_wmma_load_b_f16_col_shared_stride:
593 return getWmmaLdVariant(Variant, /*Stride=*/true,
594 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_shared));
595 case Intrinsic::nvvm_wmma_load_b_f16_row_shared_stride:
596 return getWmmaLdVariant(Variant, /*Stride=*/true,
597 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_shared));
598 case Intrinsic::nvvm_wmma_load_b_f16_col_global:
599 return getWmmaLdVariant(Variant, /*Stride=*/false,
600 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_global));
601 case Intrinsic::nvvm_wmma_load_b_f16_row_global:
602 return getWmmaLdVariant(Variant, /*Stride=*/false,
603 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_global));
604 case Intrinsic::nvvm_wmma_load_b_f16_col_global_stride:
605 return getWmmaLdVariant(Variant, /*Stride=*/true,
606 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_global));
607 case Intrinsic::nvvm_wmma_load_b_f16_row_global_stride:
608 return getWmmaLdVariant(Variant, /*Stride=*/true,
609 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_global));
610
611 //
612 // WMMA_LOAD_C f16
613 //
614 case Intrinsic::nvvm_wmma_load_c_f16_col:
615 return getWmmaLdVariant(Variant, /*Stride=*/false,
616 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col));
617 case Intrinsic::nvvm_wmma_load_c_f16_row:
618 return getWmmaLdVariant(Variant, /*Stride=*/false,
619 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row));
620 case Intrinsic::nvvm_wmma_load_c_f16_col_stride:
621 return getWmmaLdVariant(Variant, /*Stride=*/true,
622 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col));
623 case Intrinsic::nvvm_wmma_load_c_f16_row_stride:
624 return getWmmaLdVariant(Variant, /*Stride=*/true,
625 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row));
626 case Intrinsic::nvvm_wmma_load_c_f16_col_shared:
627 return getWmmaLdVariant(
628 Variant, /*Stride=*/false,
629 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_shared));
630 case Intrinsic::nvvm_wmma_load_c_f16_row_shared:
631 return getWmmaLdVariant(
632 Variant, /*Stride=*/false,
633 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_shared));
634 case Intrinsic::nvvm_wmma_load_c_f16_col_shared_stride:
635 return getWmmaLdVariant(
636 Variant, /*Stride=*/true,
637 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_shared));
638 case Intrinsic::nvvm_wmma_load_c_f16_row_shared_stride:
639 return getWmmaLdVariant(
640 Variant, /*Stride=*/true,
641 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_shared));
642 case Intrinsic::nvvm_wmma_load_c_f16_col_global:
643 return getWmmaLdVariant(
644 Variant, /*Stride=*/false,
645 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_global));
646 case Intrinsic::nvvm_wmma_load_c_f16_row_global:
647 return getWmmaLdVariant(
648 Variant, /*Stride=*/false,
649 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_global));
650 case Intrinsic::nvvm_wmma_load_c_f16_col_global_stride:
651 return getWmmaLdVariant(
652 Variant, /*Stride=*/true,
653 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_global));
654 case Intrinsic::nvvm_wmma_load_c_f16_row_global_stride:
655 return getWmmaLdVariant(
656 Variant, /*Stride=*/true,
657 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_global));
658
659 //
660 // WMMA_LOAD_C f32
661 //
662 case Intrinsic::nvvm_wmma_load_c_f32_col:
663 return getWmmaLdVariant(Variant, /*Stride=*/false,
664 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col));
665 case Intrinsic::nvvm_wmma_load_c_f32_row:
666 return getWmmaLdVariant(Variant, /*Stride=*/false,
667 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row));
668 case Intrinsic::nvvm_wmma_load_c_f32_col_stride:
669 return getWmmaLdVariant(Variant, /*Stride=*/true,
670 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col));
671 case Intrinsic::nvvm_wmma_load_c_f32_row_stride:
672 return getWmmaLdVariant(Variant, /*Stride=*/true,
673 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row));
674 case Intrinsic::nvvm_wmma_load_c_f32_col_shared:
675 return getWmmaLdVariant(
676 Variant, /*Stride=*/false,
677 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_shared));
678 case Intrinsic::nvvm_wmma_load_c_f32_row_shared:
679 return getWmmaLdVariant(
680 Variant, /*Stride=*/false,
681 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_shared));
682 case Intrinsic::nvvm_wmma_load_c_f32_col_shared_stride:
683 return getWmmaLdVariant(
684 Variant, /*Stride=*/true,
685 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_shared));
686 case Intrinsic::nvvm_wmma_load_c_f32_row_shared_stride:
687 return getWmmaLdVariant(
688 Variant, /*Stride=*/true,
689 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_shared));
690 case Intrinsic::nvvm_wmma_load_c_f32_col_global:
691 return getWmmaLdVariant(
692 Variant, /*Stride=*/false,
693 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_global));
694 case Intrinsic::nvvm_wmma_load_c_f32_row_global:
695 return getWmmaLdVariant(
696 Variant, /*Stride=*/false,
697 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_global));
698 case Intrinsic::nvvm_wmma_load_c_f32_col_global_stride:
699 return getWmmaLdVariant(
700 Variant, /*Stride=*/true,
701 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_global));
702 case Intrinsic::nvvm_wmma_load_c_f32_row_global_stride:
703 return getWmmaLdVariant(
704 Variant, /*Stride=*/true,
705 WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_global));
706
707 //
708 // WMMA_STORE_D f16
709 //
710 case Intrinsic::nvvm_wmma_store_d_f16_col:
711 return getWmmaLdVariant(Variant, /*Stride=*/false,
712 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col));
713 case Intrinsic::nvvm_wmma_store_d_f16_row:
714 return getWmmaLdVariant(Variant, /*Stride=*/false,
715 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row));
716 case Intrinsic::nvvm_wmma_store_d_f16_col_stride:
717 return getWmmaLdVariant(Variant, /*Stride=*/true,
718 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col));
719 case Intrinsic::nvvm_wmma_store_d_f16_row_stride:
720 return getWmmaLdVariant(Variant, /*Stride=*/true,
721 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row));
722 case Intrinsic::nvvm_wmma_store_d_f16_col_shared:
723 return getWmmaLdVariant(
724 Variant, /*Stride=*/false,
725 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_shared));
726 case Intrinsic::nvvm_wmma_store_d_f16_row_shared:
727 return getWmmaLdVariant(
728 Variant, /*Stride=*/false,
729 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_shared));
730 case Intrinsic::nvvm_wmma_store_d_f16_col_shared_stride:
731 return getWmmaLdVariant(
732 Variant, /*Stride=*/true,
733 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_shared));
734 case Intrinsic::nvvm_wmma_store_d_f16_row_shared_stride:
735 return getWmmaLdVariant(
736 Variant, /*Stride=*/true,
737 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_shared));
738 case Intrinsic::nvvm_wmma_store_d_f16_col_global:
739 return getWmmaLdVariant(
740 Variant, /*Stride=*/false,
741 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_global));
742 case Intrinsic::nvvm_wmma_store_d_f16_row_global:
743 return getWmmaLdVariant(
744 Variant, /*Stride=*/false,
745 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_global));
746 case Intrinsic::nvvm_wmma_store_d_f16_col_global_stride:
747 return getWmmaLdVariant(
748 Variant, /*Stride=*/true,
749 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_global));
750 case Intrinsic::nvvm_wmma_store_d_f16_row_global_stride:
751 return getWmmaLdVariant(
752 Variant, /*Stride=*/true,
753 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_global));
754
755 //
756 // WMMA_STORE_D f32
757 //
758 case Intrinsic::nvvm_wmma_store_d_f32_col:
759 return getWmmaLdVariant(Variant, /*Stride=*/false,
760 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col));
761 case Intrinsic::nvvm_wmma_store_d_f32_row:
762 return getWmmaLdVariant(Variant, /*Stride=*/false,
763 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row));
764 case Intrinsic::nvvm_wmma_store_d_f32_col_stride:
765 return getWmmaLdVariant(Variant, /*Stride=*/true,
766 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col));
767 case Intrinsic::nvvm_wmma_store_d_f32_row_stride:
768 return getWmmaLdVariant(Variant, /*Stride=*/true,
769 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row));
770 case Intrinsic::nvvm_wmma_store_d_f32_col_shared:
771 return getWmmaLdVariant(
772 Variant, /*Stride=*/false,
773 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_shared));
774 case Intrinsic::nvvm_wmma_store_d_f32_row_shared:
775 return getWmmaLdVariant(
776 Variant, /*Stride=*/false,
777 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_shared));
778 case Intrinsic::nvvm_wmma_store_d_f32_col_shared_stride:
779 return getWmmaLdVariant(
780 Variant, /*Stride=*/true,
781 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_shared));
782 case Intrinsic::nvvm_wmma_store_d_f32_row_shared_stride:
783 return getWmmaLdVariant(
784 Variant, /*Stride=*/true,
785 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_shared));
786 case Intrinsic::nvvm_wmma_store_d_f32_col_global:
787 return getWmmaLdVariant(
788 Variant, /*Stride=*/false,
789 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_global));
790 case Intrinsic::nvvm_wmma_store_d_f32_row_global:
791 return getWmmaLdVariant(
792 Variant, /*Stride=*/false,
793 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_global));
794 case Intrinsic::nvvm_wmma_store_d_f32_col_global_stride:
795 return getWmmaLdVariant(
796 Variant, /*Stride=*/true,
797 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_global));
798 case Intrinsic::nvvm_wmma_store_d_f32_row_global_stride:
799 return getWmmaLdVariant(
800 Variant, /*Stride=*/true,
801 WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_global));
802 }
803}
804#undef WMMA_VARIANTS
805
Justin Bogner8d83fb62016-05-13 21:12:53 +0000806bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000807 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
Artem Belevich3bafc2f2017-10-12 18:27:55 +0000808 if (getWmmaLdStOpcode(IID))
809 return tryWMMA_LDST(N);
810
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000811 switch (IID) {
812 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000813 return false;
Artem Belevich55dcf5e2017-11-14 19:14:00 +0000814 case Intrinsic::nvvm_match_all_sync_i32p:
815 case Intrinsic::nvvm_match_all_sync_i64p:
816 SelectMatchAll(N);
817 return true;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000818 case Intrinsic::nvvm_ldg_global_f:
819 case Intrinsic::nvvm_ldg_global_i:
820 case Intrinsic::nvvm_ldg_global_p:
821 case Intrinsic::nvvm_ldu_global_f:
822 case Intrinsic::nvvm_ldu_global_i:
823 case Intrinsic::nvvm_ldu_global_p:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000824 return tryLDGLDU(N);
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000825 }
826}
827
Artem Belevich64dc9be2017-01-13 20:56:17 +0000828// There's no way to specify FP16 immediates in .f16 ops, so we have to
829// load them into an .f16 register first.
830bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
831 if (N->getValueType(0) != MVT::f16)
832 return false;
833 SDValue Val = CurDAG->getTargetConstantFP(
834 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
835 SDNode *LoadConstF16 =
836 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
837 ReplaceNode(N, LoadConstF16);
838 return true;
839}
840
Artem Belevich620db1f2017-02-23 22:38:24 +0000841// Map ISD:CONDCODE value to appropriate CmpMode expected by
842// NVPTXInstPrinter::printCmpMode()
843static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
844 using NVPTX::PTXCmpMode::CmpMode;
845 unsigned PTXCmpMode = [](ISD::CondCode CC) {
846 switch (CC) {
847 default:
848 llvm_unreachable("Unexpected condition code.");
849 case ISD::SETOEQ:
850 return CmpMode::EQ;
851 case ISD::SETOGT:
852 return CmpMode::GT;
853 case ISD::SETOGE:
854 return CmpMode::GE;
855 case ISD::SETOLT:
856 return CmpMode::LT;
857 case ISD::SETOLE:
858 return CmpMode::LE;
859 case ISD::SETONE:
860 return CmpMode::NE;
861 case ISD::SETO:
862 return CmpMode::NUM;
863 case ISD::SETUO:
864 return CmpMode::NotANumber;
865 case ISD::SETUEQ:
866 return CmpMode::EQU;
867 case ISD::SETUGT:
868 return CmpMode::GTU;
869 case ISD::SETUGE:
870 return CmpMode::GEU;
871 case ISD::SETULT:
872 return CmpMode::LTU;
873 case ISD::SETULE:
874 return CmpMode::LEU;
875 case ISD::SETUNE:
876 return CmpMode::NEU;
877 case ISD::SETEQ:
878 return CmpMode::EQ;
879 case ISD::SETGT:
880 return CmpMode::GT;
881 case ISD::SETGE:
882 return CmpMode::GE;
883 case ISD::SETLT:
884 return CmpMode::LT;
885 case ISD::SETLE:
886 return CmpMode::LE;
887 case ISD::SETNE:
888 return CmpMode::NE;
889 }
890 }(CondCode.get());
891
892 if (FTZ)
893 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
894
895 return PTXCmpMode;
896}
897
898bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
899 unsigned PTXCmpMode =
900 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
901 SDLoc DL(N);
902 SDNode *SetP = CurDAG->getMachineNode(
903 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
904 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
905 ReplaceNode(N, SetP);
906 return true;
907}
908
909// Find all instances of extract_vector_elt that use this v2f16 vector
910// and coalesce them into a scattering move instruction.
911bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
912 SDValue Vector = N->getOperand(0);
913
914 // We only care about f16x2 as it's the only real vector type we
915 // need to deal with.
916 if (Vector.getSimpleValueType() != MVT::v2f16)
917 return false;
918
919 // Find and record all uses of this vector that extract element 0 or 1.
920 SmallVector<SDNode *, 4> E0, E1;
921 for (const auto &U : Vector.getNode()->uses()) {
922 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
923 continue;
924 if (U->getOperand(0) != Vector)
925 continue;
926 if (const ConstantSDNode *IdxConst =
927 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
928 if (IdxConst->getZExtValue() == 0)
929 E0.push_back(U);
930 else if (IdxConst->getZExtValue() == 1)
931 E1.push_back(U);
932 else
933 llvm_unreachable("Invalid vector index.");
934 }
935 }
936
937 // There's no point scattering f16x2 if we only ever access one
938 // element of it.
939 if (E0.empty() || E1.empty())
940 return false;
941
942 unsigned Op = NVPTX::SplitF16x2;
943 // If the vector has been BITCAST'ed from i32, we can use original
944 // value directly and avoid register-to-register move.
945 SDValue Source = Vector;
946 if (Vector->getOpcode() == ISD::BITCAST) {
947 Op = NVPTX::SplitI32toF16x2;
948 Source = Vector->getOperand(0);
949 }
950 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
951 // into f16,f16 SplitF16x2(V)
952 SDNode *ScatterOp =
953 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
954 for (auto *Node : E0)
955 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
956 for (auto *Node : E1)
957 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
958
959 return true;
960}
961
Eric Christopher9745b3a2015-01-30 01:41:01 +0000962static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000963 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000964
Justin Holewinskiae556d32012-05-04 20:18:50 +0000965 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000966 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000967
Craig Toppere3dcce92015-08-01 22:20:21 +0000968 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000969 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000970 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
971 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
972 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
973 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
974 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
975 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
976 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000977 }
978 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000979 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000980}
981
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000982static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000983 unsigned CodeAddrSpace, MachineFunction *F) {
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000984 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
985 // space.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000986 //
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000987 // We have two ways of identifying invariant loads: Loads may be explicitly
988 // marked as invariant, or we may infer them to be invariant.
989 //
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000990 // We currently infer invariance for loads from
991 // - constant global variables, and
992 // - kernel function pointer params that are noalias (i.e. __restrict) and
993 // never written to.
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000994 //
995 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
996 // not during the SelectionDAG phase).
997 //
998 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
999 // explicitly invariant loads because these are how clang tells us to use ldg
1000 // when the user uses a builtin.
1001 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001002 return false;
Justin Lebar6d6b11a2016-09-11 01:39:04 +00001003
1004 if (N->isInvariant())
1005 return true;
1006
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001007 bool IsKernelFn = isKernelFunction(F->getFunction());
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001008
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001009 // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
1010 // because the former looks through phi nodes while the latter does not. We
1011 // need to look through phi nodes to handle pointer induction variables.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +00001012 SmallVector<Value *, 8> Objs;
1013 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
1014 Objs, F->getDataLayout());
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001015
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001016 return all_of(Objs, [&](Value *V) {
1017 if (auto *A = dyn_cast<const Argument>(V))
1018 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
1019 if (auto *GV = dyn_cast<const GlobalVariable>(V))
1020 return GV->isConstant();
1021 return false;
1022 });
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001023}
1024
Justin Bogner8d83fb62016-05-13 21:12:53 +00001025bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00001026 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
1027 switch (IID) {
1028 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001029 return false;
Justin Holewinski30d56a72014-04-09 15:39:15 +00001030 case Intrinsic::nvvm_texsurf_handle_internal:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001031 SelectTexSurfHandle(N);
1032 return true;
Artem Belevich3bafc2f2017-10-12 18:27:55 +00001033 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16:
1034 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16_satfinite:
1035 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32:
1036 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32_satfinite:
1037 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16:
1038 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16_satfinite:
1039 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32:
1040 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32_satfinite:
1041 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16:
1042 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16_satfinite:
1043 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32:
1044 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32_satfinite:
1045 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16:
1046 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16_satfinite:
1047 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32:
1048 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32_satfinite:
1049 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16:
1050 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16_satfinite:
1051 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32:
1052 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32_satfinite:
1053 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16:
1054 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16_satfinite:
1055 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32:
1056 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32_satfinite:
1057 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16:
1058 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16_satfinite:
1059 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32:
1060 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32_satfinite:
1061 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16:
1062 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16_satfinite:
1063 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32:
1064 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32_satfinite:
1065 return tryWMMA_MMA(N);
Justin Holewinski30d56a72014-04-09 15:39:15 +00001066 }
1067}
1068
Justin Bogner8d83fb62016-05-13 21:12:53 +00001069void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00001070 // Op 0 is the intrinsic ID
1071 SDValue Wrapper = N->getOperand(1);
1072 SDValue GlobalVal = Wrapper.getOperand(0);
Justin Bogner8d83fb62016-05-13 21:12:53 +00001073 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
1074 MVT::i64, GlobalVal));
Justin Holewinski30d56a72014-04-09 15:39:15 +00001075}
1076
Artem Belevichbab95c72017-09-26 17:07:23 +00001077void NVPTXDAGToDAGISel::SelectMatchAll(SDNode *N) {
1078 SDLoc DL(N);
1079 enum { IS_I64 = 4, HAS_CONST_VALUE = 2, HAS_CONST_MASK = 1 };
Artem Belevich55dcf5e2017-11-14 19:14:00 +00001080 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
Artem Belevichbab95c72017-09-26 17:07:23 +00001081 unsigned OpcodeIndex =
1082 (IID == Intrinsic::nvvm_match_all_sync_i64p) ? IS_I64 : 0;
Artem Belevich55dcf5e2017-11-14 19:14:00 +00001083 SDValue MaskOp = N->getOperand(2);
1084 SDValue ValueOp = N->getOperand(3);
Artem Belevichbab95c72017-09-26 17:07:23 +00001085 if (ConstantSDNode *ValueConst = dyn_cast<ConstantSDNode>(ValueOp)) {
1086 OpcodeIndex |= HAS_CONST_VALUE;
1087 ValueOp = CurDAG->getTargetConstant(ValueConst->getZExtValue(), DL,
1088 ValueConst->getValueType(0));
1089 }
1090 if (ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(MaskOp)) {
1091 OpcodeIndex |= HAS_CONST_MASK;
1092 MaskOp = CurDAG->getTargetConstant(MaskConst->getZExtValue(), DL,
1093 MaskConst->getValueType(0));
1094 }
1095 // Maps {IS_I64, HAS_CONST_VALUE, HAS_CONST_MASK} -> opcode
1096 unsigned Opcodes[8] = {
1097 NVPTX::MATCH_ALLP_SYNC_32rr, NVPTX::MATCH_ALLP_SYNC_32ri,
1098 NVPTX::MATCH_ALLP_SYNC_32ir, NVPTX::MATCH_ALLP_SYNC_32ii,
1099 NVPTX::MATCH_ALLP_SYNC_64rr, NVPTX::MATCH_ALLP_SYNC_64ri,
1100 NVPTX::MATCH_ALLP_SYNC_64ir, NVPTX::MATCH_ALLP_SYNC_64ii};
Artem Belevich55dcf5e2017-11-14 19:14:00 +00001101 SDNode *NewNode = CurDAG->getMachineNode(
1102 Opcodes[OpcodeIndex], DL, {ValueOp->getValueType(0), MVT::i1, MVT::Other},
1103 {MaskOp, ValueOp});
Artem Belevichbab95c72017-09-26 17:07:23 +00001104 ReplaceNode(N, NewNode);
1105}
1106
Justin Bogner8d83fb62016-05-13 21:12:53 +00001107void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001108 SDValue Src = N->getOperand(0);
1109 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
1110 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
1111 unsigned DstAddrSpace = CastN->getDestAddressSpace();
1112
1113 assert(SrcAddrSpace != DstAddrSpace &&
1114 "addrspacecast must be between different address spaces");
1115
1116 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
1117 // Specific to generic
1118 unsigned Opc;
1119 switch (SrcAddrSpace) {
1120 default: report_fatal_error("Bad address space in addrspacecast");
1121 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +00001122 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001123 break;
1124 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +00001125 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001126 break;
1127 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +00001128 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001129 break;
1130 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +00001131 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001132 break;
1133 }
Justin Bogner8d83fb62016-05-13 21:12:53 +00001134 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
1135 Src));
1136 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001137 } else {
1138 // Generic to specific
1139 if (SrcAddrSpace != 0)
1140 report_fatal_error("Cannot cast between two non-generic address spaces");
1141 unsigned Opc;
1142 switch (DstAddrSpace) {
1143 default: report_fatal_error("Bad address space in addrspacecast");
1144 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +00001145 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
1146 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001147 break;
1148 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +00001149 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
1150 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001151 break;
1152 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +00001153 Opc =
1154 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001155 break;
1156 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +00001157 Opc =
1158 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001159 break;
Jingyue Wua2f60272015-06-04 21:28:26 +00001160 case ADDRESS_SPACE_PARAM:
1161 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
1162 : NVPTX::nvvm_ptr_gen_to_param;
1163 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001164 }
Justin Bogner8d83fb62016-05-13 21:12:53 +00001165 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
1166 Src));
1167 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +00001168 }
1169}
1170
Artem Belevichee7dd122017-03-02 19:14:14 +00001171// Helper function template to reduce amount of boilerplate code for
1172// opcode selection.
1173static Optional<unsigned> pickOpcodeForVT(
1174 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
1175 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
1176 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
1177 switch (VT) {
1178 case MVT::i1:
1179 case MVT::i8:
1180 return Opcode_i8;
1181 case MVT::i16:
1182 return Opcode_i16;
1183 case MVT::i32:
1184 return Opcode_i32;
1185 case MVT::i64:
1186 return Opcode_i64;
1187 case MVT::f16:
1188 return Opcode_f16;
1189 case MVT::v2f16:
1190 return Opcode_f16x2;
1191 case MVT::f32:
1192 return Opcode_f32;
1193 case MVT::f64:
1194 return Opcode_f64;
1195 default:
1196 return None;
1197 }
1198}
1199
Justin Bogner8d83fb62016-05-13 21:12:53 +00001200bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001201 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001202 LoadSDNode *LD = cast<LoadSDNode>(N);
1203 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00001204 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001205
1206 // do not support pre/post inc/dec
1207 if (LD->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001208 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001209
1210 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001211 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001212
1213 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001214 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001215
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +00001216 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00001217 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001218 }
1219
Justin Holewinskiae556d32012-05-04 20:18:50 +00001220 // Volatile Setting
1221 // - .volatile is only availalble for .global and .shared
1222 bool isVolatile = LD->isVolatile();
1223 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1224 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1225 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1226 isVolatile = false;
1227
Justin Holewinskiae556d32012-05-04 20:18:50 +00001228 // Type Setting: fromType + fromTypeWidth
1229 //
1230 // Sign : ISD::SEXTLOAD
1231 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1232 // type is integer
1233 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
Artem Belevich620db1f2017-02-23 22:38:24 +00001234 MVT SimpleVT = LoadedVT.getSimpleVT();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001235 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001236 // Read at least 8 bits (predicates are stored as 8-bit values)
1237 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +00001238 unsigned int fromType;
Artem Belevich620db1f2017-02-23 22:38:24 +00001239
1240 // Vector Setting
1241 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1242 if (SimpleVT.isVector()) {
1243 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
1244 // v2f16 is loaded using ld.b32
1245 fromTypeWidth = 32;
1246 }
1247
Justin Holewinskiae556d32012-05-04 20:18:50 +00001248 if ((LD->getExtensionType() == ISD::SEXTLOAD))
1249 fromType = NVPTX::PTXLdStInstCode::Signed;
1250 else if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00001251 // f16 uses .b16 as its storage type.
1252 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1253 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001254 else
1255 fromType = NVPTX::PTXLdStInstCode::Unsigned;
1256
1257 // Create the machine instruction DAG
1258 SDValue Chain = N->getOperand(0);
1259 SDValue N1 = N->getOperand(1);
1260 SDValue Addr;
1261 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001262 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00001263 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001264
1265 if (SelectDirectAddr(N1, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001266 Opcode = pickOpcodeForVT(
1267 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
1268 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
1269 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
1270 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001271 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001272 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
1273 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1274 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001275 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
1276 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001277 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
1278 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001279 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
1280 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
1281 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
1282 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
1283 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001284 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001285 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
1286 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1287 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001288 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
1289 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001290 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
1291 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001292 if (TM.is64Bit())
1293 Opcode = pickOpcodeForVT(
1294 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
1295 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
1296 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
1297 else
1298 Opcode = pickOpcodeForVT(
1299 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
1300 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
1301 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
1302 if (!Opcode)
1303 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001304 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
1305 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1306 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001307 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
1308 MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001309 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +00001310 if (TM.is64Bit())
1311 Opcode = pickOpcodeForVT(
1312 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1313 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
1314 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
1315 NVPTX::LD_f64_areg_64);
1316 else
1317 Opcode = pickOpcodeForVT(
1318 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
1319 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
1320 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1321 if (!Opcode)
1322 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001323 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
1324 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1325 getI32Imm(fromTypeWidth, dl), N1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001326 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
1327 MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001328 }
1329
Justin Bogner8d83fb62016-05-13 21:12:53 +00001330 if (!NVPTXLD)
1331 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001332
Justin Bogner8d83fb62016-05-13 21:12:53 +00001333 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1334 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1335 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1336
1337 ReplaceNode(N, NVPTXLD);
1338 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001339}
1340
Justin Bogner8d83fb62016-05-13 21:12:53 +00001341bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001342
1343 SDValue Chain = N->getOperand(0);
1344 SDValue Op1 = N->getOperand(1);
1345 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001346 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001347 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001348 SDNode *LD;
1349 MemSDNode *MemSD = cast<MemSDNode>(N);
1350 EVT LoadedVT = MemSD->getMemoryVT();
1351
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001352 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001353 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001354
1355 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001356 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001357
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +00001358 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00001359 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001360 }
1361
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001362 // Volatile Setting
1363 // - .volatile is only availalble for .global and .shared
1364 bool IsVolatile = MemSD->isVolatile();
1365 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1366 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1367 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1368 IsVolatile = false;
1369
1370 // Vector Setting
1371 MVT SimpleVT = LoadedVT.getSimpleVT();
1372
1373 // Type Setting: fromType + fromTypeWidth
1374 //
1375 // Sign : ISD::SEXTLOAD
1376 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1377 // type is integer
1378 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1379 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001380 // Read at least 8 bits (predicates are stored as 8-bit values)
1381 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001382 unsigned int FromType;
1383 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +00001384 unsigned ExtensionType = cast<ConstantSDNode>(
1385 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001386 if (ExtensionType == ISD::SEXTLOAD)
1387 FromType = NVPTX::PTXLdStInstCode::Signed;
1388 else if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001389 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1390 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001391 else
1392 FromType = NVPTX::PTXLdStInstCode::Unsigned;
1393
1394 unsigned VecType;
1395
1396 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001397 case NVPTXISD::LoadV2:
1398 VecType = NVPTX::PTXLdStInstCode::V2;
1399 break;
1400 case NVPTXISD::LoadV4:
1401 VecType = NVPTX::PTXLdStInstCode::V4;
1402 break;
1403 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001404 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001405 }
1406
1407 EVT EltVT = N->getValueType(0);
1408
Artem Belevich620db1f2017-02-23 22:38:24 +00001409 // v8f16 is a special case. PTX doesn't have ld.v8.f16
1410 // instruction. Instead, we split the vector into v2f16 chunks and
1411 // load them with ld.v4.b32.
1412 if (EltVT == MVT::v2f16) {
1413 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1414 EltVT = MVT::i32;
1415 FromType = NVPTX::PTXLdStInstCode::Untyped;
1416 FromTypeWidth = 32;
1417 }
1418
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001419 if (SelectDirectAddr(Op1, Addr)) {
1420 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001421 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001422 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001423 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001424 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1425 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1426 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1427 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1428 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001429 break;
1430 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001431 Opcode =
1432 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1433 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1434 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1435 NVPTX::LDV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001436 break;
1437 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001438 if (!Opcode)
1439 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001440 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1441 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1442 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001443 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001444 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1445 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001446 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001447 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001448 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001449 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001450 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1451 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1452 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1453 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1454 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001455 break;
1456 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001457 Opcode =
1458 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1459 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1460 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1461 NVPTX::LDV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001462 break;
1463 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001464 if (!Opcode)
1465 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001466 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1467 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1468 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001469 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001470 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1471 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1472 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001473 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001474 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001475 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001476 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001477 Opcode = pickOpcodeForVT(
1478 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1479 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1480 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1481 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1482 NVPTX::LDV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001483 break;
1484 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001485 Opcode = pickOpcodeForVT(
1486 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1487 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1488 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1489 NVPTX::LDV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001490 break;
1491 }
1492 } else {
1493 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001494 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001495 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001496 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001497 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1498 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1499 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1500 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1501 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001502 break;
1503 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001504 Opcode =
1505 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1506 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1507 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1508 NVPTX::LDV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001509 break;
1510 }
1511 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001512 if (!Opcode)
1513 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001514 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1515 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1516 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001517
Artem Belevichee7dd122017-03-02 19:14:14 +00001518 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001519 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001520 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001521 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001522 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001523 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001524 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001525 Opcode = pickOpcodeForVT(
1526 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1527 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1528 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1529 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1530 NVPTX::LDV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001531 break;
1532 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001533 Opcode = pickOpcodeForVT(
1534 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1535 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1536 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1537 NVPTX::LDV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001538 break;
1539 }
1540 } else {
1541 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001542 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001543 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001544 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001545 Opcode =
1546 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1547 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1548 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1549 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1550 NVPTX::LDV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001551 break;
1552 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001553 Opcode = pickOpcodeForVT(
1554 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1555 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1556 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1557 NVPTX::LDV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001558 break;
1559 }
1560 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001561 if (!Opcode)
1562 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001563 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1564 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1565 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001566 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001567 }
1568
1569 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1570 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1571 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1572
Justin Bogner8d83fb62016-05-13 21:12:53 +00001573 ReplaceNode(N, LD);
1574 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001575}
1576
Justin Bogner8d83fb62016-05-13 21:12:53 +00001577bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001578
1579 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001580 SDValue Op1;
1581 MemSDNode *Mem;
1582 bool IsLDG = true;
1583
Justin Holewinskic7997922016-04-05 12:38:01 +00001584 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001585 // LDG/LDU SD node (from custom vector handling), then its the second operand
1586 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1587 Op1 = N->getOperand(2);
1588 Mem = cast<MemIntrinsicSDNode>(N);
1589 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1590 switch (IID) {
1591 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001592 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001593 case Intrinsic::nvvm_ldg_global_f:
1594 case Intrinsic::nvvm_ldg_global_i:
1595 case Intrinsic::nvvm_ldg_global_p:
1596 IsLDG = true;
1597 break;
1598 case Intrinsic::nvvm_ldu_global_f:
1599 case Intrinsic::nvvm_ldu_global_i:
1600 case Intrinsic::nvvm_ldu_global_p:
1601 IsLDG = false;
1602 break;
1603 }
1604 } else {
1605 Op1 = N->getOperand(1);
1606 Mem = cast<MemSDNode>(N);
1607 }
1608
Artem Belevichee7dd122017-03-02 19:14:14 +00001609 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001610 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001611 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001612 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001613
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001614 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001615 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001616 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001617 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001618 EltVT = EltVT.getVectorElementType();
1619 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001620
Justin Holewinskic7997922016-04-05 12:38:01 +00001621 // Build the "promoted" result VTList for the load. If we are really loading
1622 // i8s, then the return type will be promoted to i16 since we do not expose
1623 // 8-bit registers in NVPTX.
1624 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1625 SmallVector<EVT, 5> InstVTs;
1626 for (unsigned i = 0; i != NumElts; ++i) {
1627 InstVTs.push_back(NodeVT);
1628 }
1629 InstVTs.push_back(MVT::Other);
1630 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1631
Justin Holewinskie40e9292013-07-01 12:58:52 +00001632 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001633 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001634 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001635 return false;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001636 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001637 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001638 if (IsLDG)
1639 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1640 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1641 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1642 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1643 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1644 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1645 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1646 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1647 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1648 else
1649 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1650 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1651 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1652 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1653 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1654 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1655 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1656 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1657 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001658 break;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001659 case NVPTXISD::LoadV2:
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001660 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001661 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1662 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1663 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1664 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1665 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1666 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1667 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1668 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1669 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001670 break;
1671 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001672 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1673 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1674 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1675 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1676 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1677 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1678 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1679 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1680 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001681 break;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001682 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001683 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001684 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1685 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1686 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1687 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1688 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1689 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1690 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001691 break;
1692 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001693 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1694 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1695 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1696 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1697 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1698 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1699 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001700 break;
1701 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001702 if (!Opcode)
1703 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001704 SDValue Ops[] = { Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001705 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001706 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1707 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1708 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001709 switch (N->getOpcode()) {
1710 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001711 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001712 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001713 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001714 if (IsLDG)
1715 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1716 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1717 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1718 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1719 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1720 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1721 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1722 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1723 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1724 else
1725 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1726 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1727 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1728 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1729 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1730 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1731 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1732 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1733 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001734 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001735 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001736 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001737 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1738 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1739 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1740 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1741 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1742 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1743 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1744 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1745 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001746 break;
1747 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001748 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1749 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1750 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1751 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1752 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1753 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1754 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1755 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1756 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001757 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001758 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001759 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001760 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1761 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1762 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1763 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1764 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1765 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1766 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001767 break;
1768 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001769 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1770 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1771 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1772 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1773 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1774 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1775 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001776 break;
1777 }
1778 } else {
1779 switch (N->getOpcode()) {
1780 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001781 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001782 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001783 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001784 if (IsLDG)
1785 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1786 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1787 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1788 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1789 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1790 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1791 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1792 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1793 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1794 else
1795 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1796 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1797 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1798 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1799 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1800 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1801 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1802 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1803 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001804 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001805 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001806 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001807 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1808 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1809 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1810 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1811 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1812 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1813 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1814 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1815 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001816 break;
1817 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001818 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1819 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1820 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1821 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1822 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1823 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1824 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1825 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1826 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001827 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001828 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001829 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001830 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1831 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1832 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1833 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1834 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1835 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1836 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001837 break;
1838 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001839 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1840 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1841 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1842 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1843 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1844 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1845 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001846 break;
1847 }
1848 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001849 if (!Opcode)
1850 return false;
1851 SDValue Ops[] = {Base, Offset, Chain};
1852 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001853 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001854 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001855 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001856 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001857 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001858 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001859 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001860 if (IsLDG)
1861 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1862 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1863 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1864 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1865 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1866 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1867 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1868 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1869 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1870 else
1871 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1872 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1873 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1874 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1875 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1876 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1877 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1878 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1879 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001880 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001881 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001882 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001883 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1884 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1885 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1886 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1887 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1888 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1889 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1890 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1891 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001892 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001893 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001894 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1895 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1896 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1897 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1898 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1899 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1900 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1901 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1902 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001903 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001904 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001905 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001906 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1907 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1908 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1909 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1910 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1911 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1912 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001913 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001914 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001915 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1916 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1917 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1918 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1919 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1920 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1921 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001922 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001923 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001924 } else {
1925 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001926 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001927 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001928 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001929 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001930 if (IsLDG)
1931 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1932 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1933 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1934 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1935 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1936 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1937 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1938 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1939 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1940 else
1941 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1942 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1943 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1944 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1945 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1946 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1947 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1948 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1949 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001950 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001951 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001952 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001953 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1954 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1955 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1956 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1957 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1958 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1959 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1960 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1961 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001962 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001963 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001964 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1965 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1966 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1967 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1968 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1969 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1970 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1971 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1972 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001973 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001974 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001975 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001976 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1977 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1978 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1979 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1980 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1981 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1982 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001983 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001984 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001985 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1986 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1987 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1988 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1989 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1990 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1991 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001992 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001993 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001994 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001995 if (!Opcode)
1996 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001997 SDValue Ops[] = { Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001998 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001999 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002000
2001 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002002 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002003 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2004
Justin Holewinskic7997922016-04-05 12:38:01 +00002005 // For automatic generation of LDG (through SelectLoad[Vector], not the
2006 // intrinsics), we may have an extending load like:
2007 //
2008 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2009 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002010 // In this case, the matching logic above will select a load for the original
2011 // memory type (in this case, i8) and our types will not match (the node needs
2012 // to return an i32 in this case). Our LDG/LDU nodes do not support the
2013 // concept of sign-/zero-extension, so emulate it here by adding an explicit
2014 // CVT instruction. Ptxas should clean up any redundancies here.
2015
Justin Holewinskic7997922016-04-05 12:38:01 +00002016 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002017 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00002018
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002019 if (OrigType != EltVT && LdNode) {
2020 // We have an extending-load. The instruction we selected operates on the
2021 // smaller type, but the SDNode we are replacing has the larger type. We
2022 // need to emit a CVT to make the types match.
2023 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2024 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2025 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00002026
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002027 // For each output value, apply the manual sign/zero-extension and make sure
2028 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00002029 for (unsigned i = 0; i != NumElts; ++i) {
2030 SDValue Res(LD, i);
2031 SDValue OrigVal(N, i);
2032
2033 SDNode *CvtNode =
2034 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002035 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2036 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00002037 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2038 }
2039 }
2040
Justin Bogner8d83fb62016-05-13 21:12:53 +00002041 ReplaceNode(N, LD);
2042 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002043}
2044
Justin Bogner8d83fb62016-05-13 21:12:53 +00002045bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002046 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002047 StoreSDNode *ST = cast<StoreSDNode>(N);
2048 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00002049 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002050
2051 // do not support pre/post inc/dec
2052 if (ST->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00002053 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002054
2055 if (!StoreVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00002056 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002057
2058 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002059 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002060
2061 // Volatile Setting
2062 // - .volatile is only availalble for .global and .shared
2063 bool isVolatile = ST->isVolatile();
2064 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2065 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2066 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2067 isVolatile = false;
2068
2069 // Vector Setting
2070 MVT SimpleVT = StoreVT.getSimpleVT();
2071 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002072
2073 // Type Setting: toType + toTypeWidth
2074 // - for integer type, always use 'u'
2075 //
2076 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002077 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Artem Belevich620db1f2017-02-23 22:38:24 +00002078 if (SimpleVT.isVector()) {
2079 assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
2080 // v2f16 is stored using st.b32
2081 toTypeWidth = 32;
2082 }
2083
Justin Holewinskiae556d32012-05-04 20:18:50 +00002084 unsigned int toType;
2085 if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00002086 // f16 uses .b16 as its storage type.
2087 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
2088 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002089 else
2090 toType = NVPTX::PTXLdStInstCode::Unsigned;
2091
2092 // Create the machine instruction DAG
2093 SDValue Chain = N->getOperand(0);
2094 SDValue N1 = N->getOperand(1);
2095 SDValue N2 = N->getOperand(2);
2096 SDValue Addr;
2097 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00002098 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00002099 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002100
2101 if (SelectDirectAddr(N2, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00002102 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
2103 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
2104 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
2105 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
2106 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00002107 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002108 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2109 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2110 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2111 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00002112 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002113 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2114 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00002115 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
2116 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
2117 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
2118 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
2119 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00002120 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002121 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2122 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2123 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2124 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00002125 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002126 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2127 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00002128 if (TM.is64Bit())
2129 Opcode = pickOpcodeForVT(
2130 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
2131 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
2132 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
2133 else
2134 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
2135 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
2136 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
2137 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
2138 if (!Opcode)
2139 return false;
2140
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002141 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2142 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2143 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2144 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00002145 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002146 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +00002147 if (TM.is64Bit())
2148 Opcode =
2149 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
2150 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
2151 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
2152 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
2153 else
2154 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
2155 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
2156 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
2157 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
2158 if (!Opcode)
2159 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002160 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2161 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2162 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2163 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00002164 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002165 }
2166
Justin Bogner8d83fb62016-05-13 21:12:53 +00002167 if (!NVPTXST)
2168 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002169
Justin Bogner8d83fb62016-05-13 21:12:53 +00002170 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2171 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2172 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2173 ReplaceNode(N, NVPTXST);
2174 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002175}
2176
Justin Bogner8d83fb62016-05-13 21:12:53 +00002177bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002178 SDValue Chain = N->getOperand(0);
2179 SDValue Op1 = N->getOperand(1);
2180 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00002181 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00002182 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002183 SDNode *ST;
2184 EVT EltVT = Op1.getValueType();
2185 MemSDNode *MemSD = cast<MemSDNode>(N);
2186 EVT StoreVT = MemSD->getMemoryVT();
2187
2188 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002189 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002190
2191 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2192 report_fatal_error("Cannot store to pointer that points to constant "
2193 "memory space");
2194 }
2195
2196 // Volatile Setting
2197 // - .volatile is only availalble for .global and .shared
2198 bool IsVolatile = MemSD->isVolatile();
2199 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2200 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2201 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2202 IsVolatile = false;
2203
2204 // Type Setting: toType + toTypeWidth
2205 // - for integer type, always use 'u'
2206 assert(StoreVT.isSimple() && "Store value is not simple");
2207 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002208 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002209 unsigned ToType;
2210 if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00002211 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
2212 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002213 else
2214 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2215
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002216 SmallVector<SDValue, 12> StOps;
2217 SDValue N2;
2218 unsigned VecType;
2219
2220 switch (N->getOpcode()) {
2221 case NVPTXISD::StoreV2:
2222 VecType = NVPTX::PTXLdStInstCode::V2;
2223 StOps.push_back(N->getOperand(1));
2224 StOps.push_back(N->getOperand(2));
2225 N2 = N->getOperand(3);
2226 break;
2227 case NVPTXISD::StoreV4:
2228 VecType = NVPTX::PTXLdStInstCode::V4;
2229 StOps.push_back(N->getOperand(1));
2230 StOps.push_back(N->getOperand(2));
2231 StOps.push_back(N->getOperand(3));
2232 StOps.push_back(N->getOperand(4));
2233 N2 = N->getOperand(5);
2234 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002235 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002236 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002237 }
2238
Artem Belevich620db1f2017-02-23 22:38:24 +00002239 // v8f16 is a special case. PTX doesn't have st.v8.f16
2240 // instruction. Instead, we split the vector into v2f16 chunks and
2241 // store them with st.v4.b32.
2242 if (EltVT == MVT::v2f16) {
2243 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
2244 EltVT = MVT::i32;
2245 ToType = NVPTX::PTXLdStInstCode::Untyped;
2246 ToTypeWidth = 32;
2247 }
2248
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002249 StOps.push_back(getI32Imm(IsVolatile, DL));
2250 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2251 StOps.push_back(getI32Imm(VecType, DL));
2252 StOps.push_back(getI32Imm(ToType, DL));
2253 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002254
2255 if (SelectDirectAddr(N2, Addr)) {
2256 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002257 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002258 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002259 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002260 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2261 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
2262 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
2263 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
2264 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002265 break;
2266 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002267 Opcode =
2268 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
2269 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
2270 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
2271 NVPTX::STV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002272 break;
2273 }
2274 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00002275 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2276 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002277 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002278 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002279 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002280 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002281 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2282 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
2283 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
2284 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
2285 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002286 break;
2287 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002288 Opcode =
2289 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
2290 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
2291 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
2292 NVPTX::STV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002293 break;
2294 }
2295 StOps.push_back(Base);
2296 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00002297 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2298 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2299 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002300 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002301 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002302 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002303 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002304 Opcode = pickOpcodeForVT(
2305 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
2306 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2307 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2308 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2309 NVPTX::STV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002310 break;
2311 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002312 Opcode = pickOpcodeForVT(
2313 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2314 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
2315 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2316 NVPTX::STV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002317 break;
2318 }
2319 } else {
2320 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002321 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002322 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002323 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002324 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2325 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2326 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2327 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2328 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002329 break;
2330 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002331 Opcode =
2332 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
2333 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
2334 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2335 NVPTX::STV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002336 break;
2337 }
2338 }
2339 StOps.push_back(Base);
2340 StOps.push_back(Offset);
2341 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002342 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002343 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002344 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002345 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002346 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002347 Opcode = pickOpcodeForVT(
2348 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2349 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2350 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2351 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2352 NVPTX::STV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002353 break;
2354 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002355 Opcode = pickOpcodeForVT(
2356 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2357 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2358 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2359 NVPTX::STV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002360 break;
2361 }
2362 } else {
2363 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002364 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002365 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002366 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002367 Opcode =
2368 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2369 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2370 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2371 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2372 NVPTX::STV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002373 break;
2374 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002375 Opcode =
2376 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2377 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2378 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2379 NVPTX::STV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002380 break;
2381 }
2382 }
2383 StOps.push_back(N2);
2384 }
2385
Artem Belevichee7dd122017-03-02 19:14:14 +00002386 if (!Opcode)
2387 return false;
2388
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002389 StOps.push_back(Chain);
2390
Artem Belevichee7dd122017-03-02 19:14:14 +00002391 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002392
2393 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2394 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2395 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2396
Justin Bogner8d83fb62016-05-13 21:12:53 +00002397 ReplaceNode(N, ST);
2398 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002399}
2400
Justin Bogner8d83fb62016-05-13 21:12:53 +00002401bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002402 SDValue Chain = Node->getOperand(0);
2403 SDValue Offset = Node->getOperand(2);
2404 SDValue Flag = Node->getOperand(3);
2405 SDLoc DL(Node);
2406 MemSDNode *Mem = cast<MemSDNode>(Node);
2407
2408 unsigned VecSize;
2409 switch (Node->getOpcode()) {
2410 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002411 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002412 case NVPTXISD::LoadParam:
2413 VecSize = 1;
2414 break;
2415 case NVPTXISD::LoadParamV2:
2416 VecSize = 2;
2417 break;
2418 case NVPTXISD::LoadParamV4:
2419 VecSize = 4;
2420 break;
2421 }
2422
2423 EVT EltVT = Node->getValueType(0);
2424 EVT MemVT = Mem->getMemoryVT();
2425
Artem Belevichee7dd122017-03-02 19:14:14 +00002426 Optional<unsigned> Opcode;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002427
2428 switch (VecSize) {
2429 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002430 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002431 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002432 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2433 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2434 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2435 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2436 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002437 break;
2438 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002439 Opcode =
2440 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2441 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2442 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2443 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2444 NVPTX::LoadParamMemV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002445 break;
2446 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002447 Opcode = pickOpcodeForVT(
2448 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2449 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2450 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2451 NVPTX::LoadParamMemV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002452 break;
2453 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002454 if (!Opcode)
2455 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002456
2457 SDVTList VTs;
2458 if (VecSize == 1) {
2459 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2460 } else if (VecSize == 2) {
2461 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2462 } else {
2463 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002464 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002465 }
2466
2467 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2468
2469 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002470 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002471 Ops.push_back(Chain);
2472 Ops.push_back(Flag);
2473
Artem Belevichee7dd122017-03-02 19:14:14 +00002474 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
Justin Bogner8d83fb62016-05-13 21:12:53 +00002475 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002476}
2477
Justin Bogner8d83fb62016-05-13 21:12:53 +00002478bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002479 SDLoc DL(N);
2480 SDValue Chain = N->getOperand(0);
2481 SDValue Offset = N->getOperand(1);
2482 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2483 MemSDNode *Mem = cast<MemSDNode>(N);
2484
2485 // How many elements do we have?
2486 unsigned NumElts = 1;
2487 switch (N->getOpcode()) {
2488 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002489 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002490 case NVPTXISD::StoreRetval:
2491 NumElts = 1;
2492 break;
2493 case NVPTXISD::StoreRetvalV2:
2494 NumElts = 2;
2495 break;
2496 case NVPTXISD::StoreRetvalV4:
2497 NumElts = 4;
2498 break;
2499 }
2500
2501 // Build vector of operands
2502 SmallVector<SDValue, 6> Ops;
2503 for (unsigned i = 0; i < NumElts; ++i)
2504 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002505 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002506 Ops.push_back(Chain);
2507
2508 // Determine target opcode
2509 // If we have an i1, use an 8-bit store. The lowering code in
2510 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002511 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002512 switch (NumElts) {
2513 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002514 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002515 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002516 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2517 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2518 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2519 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2520 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002521 break;
2522 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002523 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2524 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2525 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2526 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2527 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002528 break;
2529 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002530 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2531 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2532 NVPTX::StoreRetvalV4I32, None,
2533 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2534 NVPTX::StoreRetvalV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002535 break;
2536 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002537 if (!Opcode)
2538 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002539
Artem Belevichee7dd122017-03-02 19:14:14 +00002540 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002541 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2542 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2543 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2544
Justin Bogner8d83fb62016-05-13 21:12:53 +00002545 ReplaceNode(N, Ret);
2546 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002547}
2548
Justin Bogner8d83fb62016-05-13 21:12:53 +00002549bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002550 SDLoc DL(N);
2551 SDValue Chain = N->getOperand(0);
2552 SDValue Param = N->getOperand(1);
2553 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2554 SDValue Offset = N->getOperand(2);
2555 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2556 MemSDNode *Mem = cast<MemSDNode>(N);
2557 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2558
2559 // How many elements do we have?
2560 unsigned NumElts = 1;
2561 switch (N->getOpcode()) {
2562 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002563 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002564 case NVPTXISD::StoreParamU32:
2565 case NVPTXISD::StoreParamS32:
2566 case NVPTXISD::StoreParam:
2567 NumElts = 1;
2568 break;
2569 case NVPTXISD::StoreParamV2:
2570 NumElts = 2;
2571 break;
2572 case NVPTXISD::StoreParamV4:
2573 NumElts = 4;
2574 break;
2575 }
2576
2577 // Build vector of operands
2578 SmallVector<SDValue, 8> Ops;
2579 for (unsigned i = 0; i < NumElts; ++i)
2580 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002581 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2582 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002583 Ops.push_back(Chain);
2584 Ops.push_back(Flag);
2585
2586 // Determine target opcode
2587 // If we have an i1, use an 8-bit store. The lowering code in
2588 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002589 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002590 switch (N->getOpcode()) {
2591 default:
2592 switch (NumElts) {
2593 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002594 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002595 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002596 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2597 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2598 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2599 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2600 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002601 break;
2602 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002603 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2604 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2605 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2606 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2607 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002608 break;
2609 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002610 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2611 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2612 NVPTX::StoreParamV4I32, None,
2613 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2614 NVPTX::StoreParamV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002615 break;
2616 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002617 if (!Opcode)
2618 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002619 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002620 // Special case: if we have a sign-extend/zero-extend node, insert the
2621 // conversion instruction first, and use that as the value operand to
2622 // the selected StoreParam node.
2623 case NVPTXISD::StoreParamU32: {
2624 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002625 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002626 MVT::i32);
2627 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2628 MVT::i32, Ops[0], CvtNone);
2629 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002630 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002631 }
2632 case NVPTXISD::StoreParamS32: {
2633 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002634 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002635 MVT::i32);
2636 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2637 MVT::i32, Ops[0], CvtNone);
2638 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002639 break;
2640 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002641 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00002642
Justin Holewinskidff28d22013-07-01 12:59:01 +00002643 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002644 SDNode *Ret =
Artem Belevichee7dd122017-03-02 19:14:14 +00002645 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002646 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2647 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2648 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2649
Justin Bogner8d83fb62016-05-13 21:12:53 +00002650 ReplaceNode(N, Ret);
2651 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002652}
2653
Justin Bogner8d83fb62016-05-13 21:12:53 +00002654bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002655 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002656
2657 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002658 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002659 case NVPTXISD::Tex1DFloatS32:
2660 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002661 break;
2662 case NVPTXISD::Tex1DFloatFloat:
2663 Opc = NVPTX::TEX_1D_F32_F32;
2664 break;
2665 case NVPTXISD::Tex1DFloatFloatLevel:
2666 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2667 break;
2668 case NVPTXISD::Tex1DFloatFloatGrad:
2669 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2670 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002671 case NVPTXISD::Tex1DS32S32:
2672 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002673 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002674 case NVPTXISD::Tex1DS32Float:
2675 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002676 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002677 case NVPTXISD::Tex1DS32FloatLevel:
2678 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002679 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002680 case NVPTXISD::Tex1DS32FloatGrad:
2681 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002682 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002683 case NVPTXISD::Tex1DU32S32:
2684 Opc = NVPTX::TEX_1D_U32_S32;
2685 break;
2686 case NVPTXISD::Tex1DU32Float:
2687 Opc = NVPTX::TEX_1D_U32_F32;
2688 break;
2689 case NVPTXISD::Tex1DU32FloatLevel:
2690 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2691 break;
2692 case NVPTXISD::Tex1DU32FloatGrad:
2693 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2694 break;
2695 case NVPTXISD::Tex1DArrayFloatS32:
2696 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002697 break;
2698 case NVPTXISD::Tex1DArrayFloatFloat:
2699 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2700 break;
2701 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2702 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2703 break;
2704 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2705 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2706 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002707 case NVPTXISD::Tex1DArrayS32S32:
2708 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002709 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002710 case NVPTXISD::Tex1DArrayS32Float:
2711 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002712 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002713 case NVPTXISD::Tex1DArrayS32FloatLevel:
2714 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002715 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002716 case NVPTXISD::Tex1DArrayS32FloatGrad:
2717 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002718 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002719 case NVPTXISD::Tex1DArrayU32S32:
2720 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2721 break;
2722 case NVPTXISD::Tex1DArrayU32Float:
2723 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2724 break;
2725 case NVPTXISD::Tex1DArrayU32FloatLevel:
2726 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2727 break;
2728 case NVPTXISD::Tex1DArrayU32FloatGrad:
2729 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2730 break;
2731 case NVPTXISD::Tex2DFloatS32:
2732 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002733 break;
2734 case NVPTXISD::Tex2DFloatFloat:
2735 Opc = NVPTX::TEX_2D_F32_F32;
2736 break;
2737 case NVPTXISD::Tex2DFloatFloatLevel:
2738 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2739 break;
2740 case NVPTXISD::Tex2DFloatFloatGrad:
2741 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2742 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002743 case NVPTXISD::Tex2DS32S32:
2744 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002745 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002746 case NVPTXISD::Tex2DS32Float:
2747 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002748 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002749 case NVPTXISD::Tex2DS32FloatLevel:
2750 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002751 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002752 case NVPTXISD::Tex2DS32FloatGrad:
2753 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002754 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002755 case NVPTXISD::Tex2DU32S32:
2756 Opc = NVPTX::TEX_2D_U32_S32;
2757 break;
2758 case NVPTXISD::Tex2DU32Float:
2759 Opc = NVPTX::TEX_2D_U32_F32;
2760 break;
2761 case NVPTXISD::Tex2DU32FloatLevel:
2762 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2763 break;
2764 case NVPTXISD::Tex2DU32FloatGrad:
2765 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2766 break;
2767 case NVPTXISD::Tex2DArrayFloatS32:
2768 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002769 break;
2770 case NVPTXISD::Tex2DArrayFloatFloat:
2771 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2772 break;
2773 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2774 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2775 break;
2776 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2777 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2778 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002779 case NVPTXISD::Tex2DArrayS32S32:
2780 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002781 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002782 case NVPTXISD::Tex2DArrayS32Float:
2783 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002784 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002785 case NVPTXISD::Tex2DArrayS32FloatLevel:
2786 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002787 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002788 case NVPTXISD::Tex2DArrayS32FloatGrad:
2789 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002790 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002791 case NVPTXISD::Tex2DArrayU32S32:
2792 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2793 break;
2794 case NVPTXISD::Tex2DArrayU32Float:
2795 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2796 break;
2797 case NVPTXISD::Tex2DArrayU32FloatLevel:
2798 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2799 break;
2800 case NVPTXISD::Tex2DArrayU32FloatGrad:
2801 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2802 break;
2803 case NVPTXISD::Tex3DFloatS32:
2804 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002805 break;
2806 case NVPTXISD::Tex3DFloatFloat:
2807 Opc = NVPTX::TEX_3D_F32_F32;
2808 break;
2809 case NVPTXISD::Tex3DFloatFloatLevel:
2810 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2811 break;
2812 case NVPTXISD::Tex3DFloatFloatGrad:
2813 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2814 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002815 case NVPTXISD::Tex3DS32S32:
2816 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002817 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002818 case NVPTXISD::Tex3DS32Float:
2819 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002820 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002821 case NVPTXISD::Tex3DS32FloatLevel:
2822 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002823 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002824 case NVPTXISD::Tex3DS32FloatGrad:
2825 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2826 break;
2827 case NVPTXISD::Tex3DU32S32:
2828 Opc = NVPTX::TEX_3D_U32_S32;
2829 break;
2830 case NVPTXISD::Tex3DU32Float:
2831 Opc = NVPTX::TEX_3D_U32_F32;
2832 break;
2833 case NVPTXISD::Tex3DU32FloatLevel:
2834 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2835 break;
2836 case NVPTXISD::Tex3DU32FloatGrad:
2837 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2838 break;
2839 case NVPTXISD::TexCubeFloatFloat:
2840 Opc = NVPTX::TEX_CUBE_F32_F32;
2841 break;
2842 case NVPTXISD::TexCubeFloatFloatLevel:
2843 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2844 break;
2845 case NVPTXISD::TexCubeS32Float:
2846 Opc = NVPTX::TEX_CUBE_S32_F32;
2847 break;
2848 case NVPTXISD::TexCubeS32FloatLevel:
2849 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2850 break;
2851 case NVPTXISD::TexCubeU32Float:
2852 Opc = NVPTX::TEX_CUBE_U32_F32;
2853 break;
2854 case NVPTXISD::TexCubeU32FloatLevel:
2855 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2856 break;
2857 case NVPTXISD::TexCubeArrayFloatFloat:
2858 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2859 break;
2860 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2861 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2862 break;
2863 case NVPTXISD::TexCubeArrayS32Float:
2864 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2865 break;
2866 case NVPTXISD::TexCubeArrayS32FloatLevel:
2867 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2868 break;
2869 case NVPTXISD::TexCubeArrayU32Float:
2870 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2871 break;
2872 case NVPTXISD::TexCubeArrayU32FloatLevel:
2873 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2874 break;
2875 case NVPTXISD::Tld4R2DFloatFloat:
2876 Opc = NVPTX::TLD4_R_2D_F32_F32;
2877 break;
2878 case NVPTXISD::Tld4G2DFloatFloat:
2879 Opc = NVPTX::TLD4_G_2D_F32_F32;
2880 break;
2881 case NVPTXISD::Tld4B2DFloatFloat:
2882 Opc = NVPTX::TLD4_B_2D_F32_F32;
2883 break;
2884 case NVPTXISD::Tld4A2DFloatFloat:
2885 Opc = NVPTX::TLD4_A_2D_F32_F32;
2886 break;
2887 case NVPTXISD::Tld4R2DS64Float:
2888 Opc = NVPTX::TLD4_R_2D_S32_F32;
2889 break;
2890 case NVPTXISD::Tld4G2DS64Float:
2891 Opc = NVPTX::TLD4_G_2D_S32_F32;
2892 break;
2893 case NVPTXISD::Tld4B2DS64Float:
2894 Opc = NVPTX::TLD4_B_2D_S32_F32;
2895 break;
2896 case NVPTXISD::Tld4A2DS64Float:
2897 Opc = NVPTX::TLD4_A_2D_S32_F32;
2898 break;
2899 case NVPTXISD::Tld4R2DU64Float:
2900 Opc = NVPTX::TLD4_R_2D_U32_F32;
2901 break;
2902 case NVPTXISD::Tld4G2DU64Float:
2903 Opc = NVPTX::TLD4_G_2D_U32_F32;
2904 break;
2905 case NVPTXISD::Tld4B2DU64Float:
2906 Opc = NVPTX::TLD4_B_2D_U32_F32;
2907 break;
2908 case NVPTXISD::Tld4A2DU64Float:
2909 Opc = NVPTX::TLD4_A_2D_U32_F32;
2910 break;
2911 case NVPTXISD::TexUnified1DFloatS32:
2912 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2913 break;
2914 case NVPTXISD::TexUnified1DFloatFloat:
2915 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2916 break;
2917 case NVPTXISD::TexUnified1DFloatFloatLevel:
2918 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2919 break;
2920 case NVPTXISD::TexUnified1DFloatFloatGrad:
2921 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2922 break;
2923 case NVPTXISD::TexUnified1DS32S32:
2924 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2925 break;
2926 case NVPTXISD::TexUnified1DS32Float:
2927 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2928 break;
2929 case NVPTXISD::TexUnified1DS32FloatLevel:
2930 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2931 break;
2932 case NVPTXISD::TexUnified1DS32FloatGrad:
2933 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2934 break;
2935 case NVPTXISD::TexUnified1DU32S32:
2936 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2937 break;
2938 case NVPTXISD::TexUnified1DU32Float:
2939 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2940 break;
2941 case NVPTXISD::TexUnified1DU32FloatLevel:
2942 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2943 break;
2944 case NVPTXISD::TexUnified1DU32FloatGrad:
2945 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2946 break;
2947 case NVPTXISD::TexUnified1DArrayFloatS32:
2948 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2949 break;
2950 case NVPTXISD::TexUnified1DArrayFloatFloat:
2951 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2952 break;
2953 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2954 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2955 break;
2956 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2957 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2958 break;
2959 case NVPTXISD::TexUnified1DArrayS32S32:
2960 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2961 break;
2962 case NVPTXISD::TexUnified1DArrayS32Float:
2963 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2964 break;
2965 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2966 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2967 break;
2968 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2969 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2970 break;
2971 case NVPTXISD::TexUnified1DArrayU32S32:
2972 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2973 break;
2974 case NVPTXISD::TexUnified1DArrayU32Float:
2975 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2976 break;
2977 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2978 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2979 break;
2980 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2981 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2982 break;
2983 case NVPTXISD::TexUnified2DFloatS32:
2984 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2985 break;
2986 case NVPTXISD::TexUnified2DFloatFloat:
2987 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2988 break;
2989 case NVPTXISD::TexUnified2DFloatFloatLevel:
2990 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2991 break;
2992 case NVPTXISD::TexUnified2DFloatFloatGrad:
2993 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2994 break;
2995 case NVPTXISD::TexUnified2DS32S32:
2996 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2997 break;
2998 case NVPTXISD::TexUnified2DS32Float:
2999 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3000 break;
3001 case NVPTXISD::TexUnified2DS32FloatLevel:
3002 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3003 break;
3004 case NVPTXISD::TexUnified2DS32FloatGrad:
3005 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3006 break;
3007 case NVPTXISD::TexUnified2DU32S32:
3008 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3009 break;
3010 case NVPTXISD::TexUnified2DU32Float:
3011 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3012 break;
3013 case NVPTXISD::TexUnified2DU32FloatLevel:
3014 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3015 break;
3016 case NVPTXISD::TexUnified2DU32FloatGrad:
3017 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3018 break;
3019 case NVPTXISD::TexUnified2DArrayFloatS32:
3020 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3021 break;
3022 case NVPTXISD::TexUnified2DArrayFloatFloat:
3023 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3024 break;
3025 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3026 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3027 break;
3028 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3029 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3030 break;
3031 case NVPTXISD::TexUnified2DArrayS32S32:
3032 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3033 break;
3034 case NVPTXISD::TexUnified2DArrayS32Float:
3035 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3036 break;
3037 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3038 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3039 break;
3040 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3041 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3042 break;
3043 case NVPTXISD::TexUnified2DArrayU32S32:
3044 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3045 break;
3046 case NVPTXISD::TexUnified2DArrayU32Float:
3047 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3048 break;
3049 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3050 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3051 break;
3052 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3053 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3054 break;
3055 case NVPTXISD::TexUnified3DFloatS32:
3056 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3057 break;
3058 case NVPTXISD::TexUnified3DFloatFloat:
3059 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3060 break;
3061 case NVPTXISD::TexUnified3DFloatFloatLevel:
3062 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3063 break;
3064 case NVPTXISD::TexUnified3DFloatFloatGrad:
3065 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3066 break;
3067 case NVPTXISD::TexUnified3DS32S32:
3068 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3069 break;
3070 case NVPTXISD::TexUnified3DS32Float:
3071 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3072 break;
3073 case NVPTXISD::TexUnified3DS32FloatLevel:
3074 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3075 break;
3076 case NVPTXISD::TexUnified3DS32FloatGrad:
3077 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3078 break;
3079 case NVPTXISD::TexUnified3DU32S32:
3080 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3081 break;
3082 case NVPTXISD::TexUnified3DU32Float:
3083 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3084 break;
3085 case NVPTXISD::TexUnified3DU32FloatLevel:
3086 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3087 break;
3088 case NVPTXISD::TexUnified3DU32FloatGrad:
3089 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3090 break;
3091 case NVPTXISD::TexUnifiedCubeFloatFloat:
3092 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3093 break;
3094 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3095 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3096 break;
3097 case NVPTXISD::TexUnifiedCubeS32Float:
3098 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3099 break;
3100 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3101 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3102 break;
3103 case NVPTXISD::TexUnifiedCubeU32Float:
3104 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3105 break;
3106 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3107 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3108 break;
3109 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3110 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3111 break;
3112 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3113 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3114 break;
3115 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3116 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3117 break;
3118 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3119 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3120 break;
3121 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3122 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3123 break;
3124 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3125 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3126 break;
3127 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3128 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3129 break;
3130 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3131 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3132 break;
3133 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3134 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3135 break;
3136 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3137 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3138 break;
3139 case NVPTXISD::Tld4UnifiedR2DS64Float:
3140 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3141 break;
3142 case NVPTXISD::Tld4UnifiedG2DS64Float:
3143 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3144 break;
3145 case NVPTXISD::Tld4UnifiedB2DS64Float:
3146 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3147 break;
3148 case NVPTXISD::Tld4UnifiedA2DS64Float:
3149 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3150 break;
3151 case NVPTXISD::Tld4UnifiedR2DU64Float:
3152 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3153 break;
3154 case NVPTXISD::Tld4UnifiedG2DU64Float:
3155 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3156 break;
3157 case NVPTXISD::Tld4UnifiedB2DU64Float:
3158 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3159 break;
3160 case NVPTXISD::Tld4UnifiedA2DU64Float:
3161 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003162 break;
3163 }
3164
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003165 // Copy over operands
Benjamin Kramer806ae442017-08-20 17:30:32 +00003166 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3167 Ops.push_back(N->getOperand(0)); // Move chain to the back.
Justin Holewinski30d56a72014-04-09 15:39:15 +00003168
Justin Bogner8d83fb62016-05-13 21:12:53 +00003169 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3170 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003171}
3172
Justin Bogner8d83fb62016-05-13 21:12:53 +00003173bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00003174 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003175 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003176 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003177 case NVPTXISD::Suld1DI8Clamp:
3178 Opc = NVPTX::SULD_1D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003179 break;
3180 case NVPTXISD::Suld1DI16Clamp:
3181 Opc = NVPTX::SULD_1D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003182 break;
3183 case NVPTXISD::Suld1DI32Clamp:
3184 Opc = NVPTX::SULD_1D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003185 break;
3186 case NVPTXISD::Suld1DI64Clamp:
3187 Opc = NVPTX::SULD_1D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003188 break;
3189 case NVPTXISD::Suld1DV2I8Clamp:
3190 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003191 break;
3192 case NVPTXISD::Suld1DV2I16Clamp:
3193 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003194 break;
3195 case NVPTXISD::Suld1DV2I32Clamp:
3196 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003197 break;
3198 case NVPTXISD::Suld1DV2I64Clamp:
3199 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003200 break;
3201 case NVPTXISD::Suld1DV4I8Clamp:
3202 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003203 break;
3204 case NVPTXISD::Suld1DV4I16Clamp:
3205 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003206 break;
3207 case NVPTXISD::Suld1DV4I32Clamp:
3208 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003209 break;
3210 case NVPTXISD::Suld1DArrayI8Clamp:
3211 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003212 break;
3213 case NVPTXISD::Suld1DArrayI16Clamp:
3214 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003215 break;
3216 case NVPTXISD::Suld1DArrayI32Clamp:
3217 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003218 break;
3219 case NVPTXISD::Suld1DArrayI64Clamp:
3220 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003221 break;
3222 case NVPTXISD::Suld1DArrayV2I8Clamp:
3223 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003224 break;
3225 case NVPTXISD::Suld1DArrayV2I16Clamp:
3226 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003227 break;
3228 case NVPTXISD::Suld1DArrayV2I32Clamp:
3229 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003230 break;
3231 case NVPTXISD::Suld1DArrayV2I64Clamp:
3232 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003233 break;
3234 case NVPTXISD::Suld1DArrayV4I8Clamp:
3235 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003236 break;
3237 case NVPTXISD::Suld1DArrayV4I16Clamp:
3238 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003239 break;
3240 case NVPTXISD::Suld1DArrayV4I32Clamp:
3241 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003242 break;
3243 case NVPTXISD::Suld2DI8Clamp:
3244 Opc = NVPTX::SULD_2D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003245 break;
3246 case NVPTXISD::Suld2DI16Clamp:
3247 Opc = NVPTX::SULD_2D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003248 break;
3249 case NVPTXISD::Suld2DI32Clamp:
3250 Opc = NVPTX::SULD_2D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003251 break;
3252 case NVPTXISD::Suld2DI64Clamp:
3253 Opc = NVPTX::SULD_2D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003254 break;
3255 case NVPTXISD::Suld2DV2I8Clamp:
3256 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003257 break;
3258 case NVPTXISD::Suld2DV2I16Clamp:
3259 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003260 break;
3261 case NVPTXISD::Suld2DV2I32Clamp:
3262 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003263 break;
3264 case NVPTXISD::Suld2DV2I64Clamp:
3265 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003266 break;
3267 case NVPTXISD::Suld2DV4I8Clamp:
3268 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003269 break;
3270 case NVPTXISD::Suld2DV4I16Clamp:
3271 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003272 break;
3273 case NVPTXISD::Suld2DV4I32Clamp:
3274 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003275 break;
3276 case NVPTXISD::Suld2DArrayI8Clamp:
3277 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003278 break;
3279 case NVPTXISD::Suld2DArrayI16Clamp:
3280 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003281 break;
3282 case NVPTXISD::Suld2DArrayI32Clamp:
3283 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003284 break;
3285 case NVPTXISD::Suld2DArrayI64Clamp:
3286 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003287 break;
3288 case NVPTXISD::Suld2DArrayV2I8Clamp:
3289 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003290 break;
3291 case NVPTXISD::Suld2DArrayV2I16Clamp:
3292 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003293 break;
3294 case NVPTXISD::Suld2DArrayV2I32Clamp:
3295 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003296 break;
3297 case NVPTXISD::Suld2DArrayV2I64Clamp:
3298 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003299 break;
3300 case NVPTXISD::Suld2DArrayV4I8Clamp:
3301 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003302 break;
3303 case NVPTXISD::Suld2DArrayV4I16Clamp:
3304 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003305 break;
3306 case NVPTXISD::Suld2DArrayV4I32Clamp:
3307 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003308 break;
3309 case NVPTXISD::Suld3DI8Clamp:
3310 Opc = NVPTX::SULD_3D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003311 break;
3312 case NVPTXISD::Suld3DI16Clamp:
3313 Opc = NVPTX::SULD_3D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003314 break;
3315 case NVPTXISD::Suld3DI32Clamp:
3316 Opc = NVPTX::SULD_3D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003317 break;
3318 case NVPTXISD::Suld3DI64Clamp:
3319 Opc = NVPTX::SULD_3D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003320 break;
3321 case NVPTXISD::Suld3DV2I8Clamp:
3322 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003323 break;
3324 case NVPTXISD::Suld3DV2I16Clamp:
3325 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003326 break;
3327 case NVPTXISD::Suld3DV2I32Clamp:
3328 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003329 break;
3330 case NVPTXISD::Suld3DV2I64Clamp:
3331 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003332 break;
3333 case NVPTXISD::Suld3DV4I8Clamp:
3334 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003335 break;
3336 case NVPTXISD::Suld3DV4I16Clamp:
3337 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003338 break;
3339 case NVPTXISD::Suld3DV4I32Clamp:
3340 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003341 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003342 case NVPTXISD::Suld1DI8Trap:
3343 Opc = NVPTX::SULD_1D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003344 break;
3345 case NVPTXISD::Suld1DI16Trap:
3346 Opc = NVPTX::SULD_1D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003347 break;
3348 case NVPTXISD::Suld1DI32Trap:
3349 Opc = NVPTX::SULD_1D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003350 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003351 case NVPTXISD::Suld1DI64Trap:
3352 Opc = NVPTX::SULD_1D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003353 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003354 case NVPTXISD::Suld1DV2I8Trap:
3355 Opc = NVPTX::SULD_1D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003356 break;
3357 case NVPTXISD::Suld1DV2I16Trap:
3358 Opc = NVPTX::SULD_1D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003359 break;
3360 case NVPTXISD::Suld1DV2I32Trap:
3361 Opc = NVPTX::SULD_1D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003362 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003363 case NVPTXISD::Suld1DV2I64Trap:
3364 Opc = NVPTX::SULD_1D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003365 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003366 case NVPTXISD::Suld1DV4I8Trap:
3367 Opc = NVPTX::SULD_1D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003368 break;
3369 case NVPTXISD::Suld1DV4I16Trap:
3370 Opc = NVPTX::SULD_1D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003371 break;
3372 case NVPTXISD::Suld1DV4I32Trap:
3373 Opc = NVPTX::SULD_1D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003374 break;
3375 case NVPTXISD::Suld1DArrayI8Trap:
3376 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003377 break;
3378 case NVPTXISD::Suld1DArrayI16Trap:
3379 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003380 break;
3381 case NVPTXISD::Suld1DArrayI32Trap:
3382 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003383 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003384 case NVPTXISD::Suld1DArrayI64Trap:
3385 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003386 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003387 case NVPTXISD::Suld1DArrayV2I8Trap:
3388 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003389 break;
3390 case NVPTXISD::Suld1DArrayV2I16Trap:
3391 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003392 break;
3393 case NVPTXISD::Suld1DArrayV2I32Trap:
3394 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003395 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003396 case NVPTXISD::Suld1DArrayV2I64Trap:
3397 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003398 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003399 case NVPTXISD::Suld1DArrayV4I8Trap:
3400 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003401 break;
3402 case NVPTXISD::Suld1DArrayV4I16Trap:
3403 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003404 break;
3405 case NVPTXISD::Suld1DArrayV4I32Trap:
3406 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003407 break;
3408 case NVPTXISD::Suld2DI8Trap:
3409 Opc = NVPTX::SULD_2D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003410 break;
3411 case NVPTXISD::Suld2DI16Trap:
3412 Opc = NVPTX::SULD_2D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003413 break;
3414 case NVPTXISD::Suld2DI32Trap:
3415 Opc = NVPTX::SULD_2D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003416 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003417 case NVPTXISD::Suld2DI64Trap:
3418 Opc = NVPTX::SULD_2D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003419 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003420 case NVPTXISD::Suld2DV2I8Trap:
3421 Opc = NVPTX::SULD_2D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003422 break;
3423 case NVPTXISD::Suld2DV2I16Trap:
3424 Opc = NVPTX::SULD_2D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003425 break;
3426 case NVPTXISD::Suld2DV2I32Trap:
3427 Opc = NVPTX::SULD_2D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003428 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003429 case NVPTXISD::Suld2DV2I64Trap:
3430 Opc = NVPTX::SULD_2D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003431 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003432 case NVPTXISD::Suld2DV4I8Trap:
3433 Opc = NVPTX::SULD_2D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003434 break;
3435 case NVPTXISD::Suld2DV4I16Trap:
3436 Opc = NVPTX::SULD_2D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003437 break;
3438 case NVPTXISD::Suld2DV4I32Trap:
3439 Opc = NVPTX::SULD_2D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003440 break;
3441 case NVPTXISD::Suld2DArrayI8Trap:
3442 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003443 break;
3444 case NVPTXISD::Suld2DArrayI16Trap:
3445 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003446 break;
3447 case NVPTXISD::Suld2DArrayI32Trap:
3448 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003449 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003450 case NVPTXISD::Suld2DArrayI64Trap:
3451 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003452 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003453 case NVPTXISD::Suld2DArrayV2I8Trap:
3454 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003455 break;
3456 case NVPTXISD::Suld2DArrayV2I16Trap:
3457 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003458 break;
3459 case NVPTXISD::Suld2DArrayV2I32Trap:
3460 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003461 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003462 case NVPTXISD::Suld2DArrayV2I64Trap:
3463 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003464 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003465 case NVPTXISD::Suld2DArrayV4I8Trap:
3466 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003467 break;
3468 case NVPTXISD::Suld2DArrayV4I16Trap:
3469 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003470 break;
3471 case NVPTXISD::Suld2DArrayV4I32Trap:
3472 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003473 break;
3474 case NVPTXISD::Suld3DI8Trap:
3475 Opc = NVPTX::SULD_3D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003476 break;
3477 case NVPTXISD::Suld3DI16Trap:
3478 Opc = NVPTX::SULD_3D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003479 break;
3480 case NVPTXISD::Suld3DI32Trap:
3481 Opc = NVPTX::SULD_3D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003482 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003483 case NVPTXISD::Suld3DI64Trap:
3484 Opc = NVPTX::SULD_3D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003485 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003486 case NVPTXISD::Suld3DV2I8Trap:
3487 Opc = NVPTX::SULD_3D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003488 break;
3489 case NVPTXISD::Suld3DV2I16Trap:
3490 Opc = NVPTX::SULD_3D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003491 break;
3492 case NVPTXISD::Suld3DV2I32Trap:
3493 Opc = NVPTX::SULD_3D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003494 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003495 case NVPTXISD::Suld3DV2I64Trap:
3496 Opc = NVPTX::SULD_3D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003497 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003498 case NVPTXISD::Suld3DV4I8Trap:
3499 Opc = NVPTX::SULD_3D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003500 break;
3501 case NVPTXISD::Suld3DV4I16Trap:
3502 Opc = NVPTX::SULD_3D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003503 break;
3504 case NVPTXISD::Suld3DV4I32Trap:
3505 Opc = NVPTX::SULD_3D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003506 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003507 case NVPTXISD::Suld1DI8Zero:
3508 Opc = NVPTX::SULD_1D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003509 break;
3510 case NVPTXISD::Suld1DI16Zero:
3511 Opc = NVPTX::SULD_1D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003512 break;
3513 case NVPTXISD::Suld1DI32Zero:
3514 Opc = NVPTX::SULD_1D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003515 break;
3516 case NVPTXISD::Suld1DI64Zero:
3517 Opc = NVPTX::SULD_1D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003518 break;
3519 case NVPTXISD::Suld1DV2I8Zero:
3520 Opc = NVPTX::SULD_1D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003521 break;
3522 case NVPTXISD::Suld1DV2I16Zero:
3523 Opc = NVPTX::SULD_1D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003524 break;
3525 case NVPTXISD::Suld1DV2I32Zero:
3526 Opc = NVPTX::SULD_1D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003527 break;
3528 case NVPTXISD::Suld1DV2I64Zero:
3529 Opc = NVPTX::SULD_1D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003530 break;
3531 case NVPTXISD::Suld1DV4I8Zero:
3532 Opc = NVPTX::SULD_1D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003533 break;
3534 case NVPTXISD::Suld1DV4I16Zero:
3535 Opc = NVPTX::SULD_1D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003536 break;
3537 case NVPTXISD::Suld1DV4I32Zero:
3538 Opc = NVPTX::SULD_1D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003539 break;
3540 case NVPTXISD::Suld1DArrayI8Zero:
3541 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003542 break;
3543 case NVPTXISD::Suld1DArrayI16Zero:
3544 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003545 break;
3546 case NVPTXISD::Suld1DArrayI32Zero:
3547 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003548 break;
3549 case NVPTXISD::Suld1DArrayI64Zero:
3550 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003551 break;
3552 case NVPTXISD::Suld1DArrayV2I8Zero:
3553 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003554 break;
3555 case NVPTXISD::Suld1DArrayV2I16Zero:
3556 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003557 break;
3558 case NVPTXISD::Suld1DArrayV2I32Zero:
3559 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003560 break;
3561 case NVPTXISD::Suld1DArrayV2I64Zero:
3562 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003563 break;
3564 case NVPTXISD::Suld1DArrayV4I8Zero:
3565 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003566 break;
3567 case NVPTXISD::Suld1DArrayV4I16Zero:
3568 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003569 break;
3570 case NVPTXISD::Suld1DArrayV4I32Zero:
3571 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003572 break;
3573 case NVPTXISD::Suld2DI8Zero:
3574 Opc = NVPTX::SULD_2D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003575 break;
3576 case NVPTXISD::Suld2DI16Zero:
3577 Opc = NVPTX::SULD_2D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003578 break;
3579 case NVPTXISD::Suld2DI32Zero:
3580 Opc = NVPTX::SULD_2D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003581 break;
3582 case NVPTXISD::Suld2DI64Zero:
3583 Opc = NVPTX::SULD_2D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003584 break;
3585 case NVPTXISD::Suld2DV2I8Zero:
3586 Opc = NVPTX::SULD_2D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003587 break;
3588 case NVPTXISD::Suld2DV2I16Zero:
3589 Opc = NVPTX::SULD_2D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003590 break;
3591 case NVPTXISD::Suld2DV2I32Zero:
3592 Opc = NVPTX::SULD_2D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003593 break;
3594 case NVPTXISD::Suld2DV2I64Zero:
3595 Opc = NVPTX::SULD_2D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003596 break;
3597 case NVPTXISD::Suld2DV4I8Zero:
3598 Opc = NVPTX::SULD_2D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003599 break;
3600 case NVPTXISD::Suld2DV4I16Zero:
3601 Opc = NVPTX::SULD_2D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003602 break;
3603 case NVPTXISD::Suld2DV4I32Zero:
3604 Opc = NVPTX::SULD_2D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003605 break;
3606 case NVPTXISD::Suld2DArrayI8Zero:
3607 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003608 break;
3609 case NVPTXISD::Suld2DArrayI16Zero:
3610 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003611 break;
3612 case NVPTXISD::Suld2DArrayI32Zero:
3613 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003614 break;
3615 case NVPTXISD::Suld2DArrayI64Zero:
3616 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003617 break;
3618 case NVPTXISD::Suld2DArrayV2I8Zero:
3619 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003620 break;
3621 case NVPTXISD::Suld2DArrayV2I16Zero:
3622 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003623 break;
3624 case NVPTXISD::Suld2DArrayV2I32Zero:
3625 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003626 break;
3627 case NVPTXISD::Suld2DArrayV2I64Zero:
3628 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003629 break;
3630 case NVPTXISD::Suld2DArrayV4I8Zero:
3631 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003632 break;
3633 case NVPTXISD::Suld2DArrayV4I16Zero:
3634 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003635 break;
3636 case NVPTXISD::Suld2DArrayV4I32Zero:
3637 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003638 break;
3639 case NVPTXISD::Suld3DI8Zero:
3640 Opc = NVPTX::SULD_3D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003641 break;
3642 case NVPTXISD::Suld3DI16Zero:
3643 Opc = NVPTX::SULD_3D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003644 break;
3645 case NVPTXISD::Suld3DI32Zero:
3646 Opc = NVPTX::SULD_3D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003647 break;
3648 case NVPTXISD::Suld3DI64Zero:
3649 Opc = NVPTX::SULD_3D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003650 break;
3651 case NVPTXISD::Suld3DV2I8Zero:
3652 Opc = NVPTX::SULD_3D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003653 break;
3654 case NVPTXISD::Suld3DV2I16Zero:
3655 Opc = NVPTX::SULD_3D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003656 break;
3657 case NVPTXISD::Suld3DV2I32Zero:
3658 Opc = NVPTX::SULD_3D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003659 break;
3660 case NVPTXISD::Suld3DV2I64Zero:
3661 Opc = NVPTX::SULD_3D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003662 break;
3663 case NVPTXISD::Suld3DV4I8Zero:
3664 Opc = NVPTX::SULD_3D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003665 break;
3666 case NVPTXISD::Suld3DV4I16Zero:
3667 Opc = NVPTX::SULD_3D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003668 break;
3669 case NVPTXISD::Suld3DV4I32Zero:
3670 Opc = NVPTX::SULD_3D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003671 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003672 }
Benjamin Kramer806ae442017-08-20 17:30:32 +00003673
3674 // Copy over operands
3675 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3676 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3677
Justin Bogner8d83fb62016-05-13 21:12:53 +00003678 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3679 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003680}
3681
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003682
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003683/// SelectBFE - Look for instruction sequences that can be made more efficient
3684/// by using the 'bfe' (bit-field extract) PTX instruction
Justin Bogner8d83fb62016-05-13 21:12:53 +00003685bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003686 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003687 SDValue LHS = N->getOperand(0);
3688 SDValue RHS = N->getOperand(1);
3689 SDValue Len;
3690 SDValue Start;
3691 SDValue Val;
3692 bool IsSigned = false;
3693
3694 if (N->getOpcode() == ISD::AND) {
3695 // Canonicalize the operands
3696 // We want 'and %val, %mask'
3697 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3698 std::swap(LHS, RHS);
3699 }
3700
3701 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3702 if (!Mask) {
3703 // We need a constant mask on the RHS of the AND
Justin Bogner8d83fb62016-05-13 21:12:53 +00003704 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003705 }
3706
3707 // Extract the mask bits
3708 uint64_t MaskVal = Mask->getZExtValue();
3709 if (!isMask_64(MaskVal)) {
3710 // We *could* handle shifted masks here, but doing so would require an
3711 // 'and' operation to fix up the low-order bits so we would trade
3712 // shr+and for bfe+and, which has the same throughput
Justin Bogner8d83fb62016-05-13 21:12:53 +00003713 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003714 }
3715
3716 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003717 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003718 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003719
3720 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3721 // We have a 'srl/and' pair, extract the effective start bit and length
3722 Val = LHS.getNode()->getOperand(0);
3723 Start = LHS.getNode()->getOperand(1);
3724 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3725 if (StartConst) {
3726 uint64_t StartVal = StartConst->getZExtValue();
3727 // How many "good" bits do we have left? "good" is defined here as bits
3728 // that exist in the original value, not shifted in.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003729 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003730 if (NumBits > GoodBits) {
3731 // Do not handle the case where bits have been shifted in. In theory
3732 // we could handle this, but the cost is likely higher than just
3733 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003734 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003735 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003736 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003737 } else {
3738 // Do not handle the case where the shift amount (can be zero if no srl
3739 // was found) is not constant. We could handle this case, but it would
3740 // require run-time logic that would be more expensive than just
3741 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003742 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003743 }
3744 } else {
3745 // Do not handle the case where the LHS of the and is not a shift. While
3746 // it would be trivial to handle this case, it would just transform
3747 // 'and' -> 'bfe', but 'and' has higher-throughput.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003748 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003749 }
3750 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3751 if (LHS->getOpcode() == ISD::AND) {
3752 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3753 if (!ShiftCnst) {
3754 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003755 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003756 }
3757
3758 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3759
3760 SDValue AndLHS = LHS->getOperand(0);
3761 SDValue AndRHS = LHS->getOperand(1);
3762
3763 // Canonicalize the AND to have the mask on the RHS
3764 if (isa<ConstantSDNode>(AndLHS)) {
3765 std::swap(AndLHS, AndRHS);
3766 }
3767
3768 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3769 if (!MaskCnst) {
3770 // Mask must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003771 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003772 }
3773
3774 uint64_t MaskVal = MaskCnst->getZExtValue();
3775 uint64_t NumZeros;
3776 uint64_t NumBits;
3777 if (isMask_64(MaskVal)) {
3778 NumZeros = 0;
3779 // The number of bits in the result bitfield will be the number of
3780 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003781 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003782 } else if (isShiftedMask_64(MaskVal)) {
3783 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003784 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003785 // The number of bits in the result bitfield will be the number of
3786 // trailing zeros plus the number of set bits in the mask minus the
3787 // number of bits we shift off
3788 NumBits = NumZeros + NumOnes - ShiftAmt;
3789 } else {
3790 // This is not a mask we can handle
Justin Bogner8d83fb62016-05-13 21:12:53 +00003791 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003792 }
3793
3794 if (ShiftAmt < NumZeros) {
3795 // Handling this case would require extra logic that would make this
3796 // transformation non-profitable
Justin Bogner8d83fb62016-05-13 21:12:53 +00003797 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003798 }
3799
3800 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003801 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3802 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003803 } else if (LHS->getOpcode() == ISD::SHL) {
3804 // Here, we have a pattern like:
3805 //
3806 // (sra (shl val, NN), MM)
3807 // or
3808 // (srl (shl val, NN), MM)
3809 //
3810 // If MM >= NN, we can efficiently optimize this with bfe
3811 Val = LHS->getOperand(0);
3812
3813 SDValue ShlRHS = LHS->getOperand(1);
3814 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3815 if (!ShlCnst) {
3816 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003817 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003818 }
3819 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3820
3821 SDValue ShrRHS = RHS;
3822 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3823 if (!ShrCnst) {
3824 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003825 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003826 }
3827 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3828
3829 // To avoid extra codegen and be profitable, we need Outer >= Inner
3830 if (OuterShiftAmt < InnerShiftAmt) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003831 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003832 }
3833
3834 // If the outer shift is more than the type size, we have no bitfield to
3835 // extract (since we also check that the inner shift is <= the outer shift
3836 // then this also implies that the inner shift is < the type size)
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003837 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003838 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003839 }
3840
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003841 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3842 MVT::i32);
3843 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3844 DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003845
3846 if (N->getOpcode() == ISD::SRA) {
3847 // If we have a arithmetic right shift, we need to use the signed bfe
3848 // variant
3849 IsSigned = true;
3850 }
3851 } else {
3852 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003853 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003854 }
3855 } else {
3856 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003857 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003858 }
3859
3860
3861 unsigned Opc;
3862 // For the BFE operations we form here from "and" and "srl", always use the
3863 // unsigned variants.
3864 if (Val.getValueType() == MVT::i32) {
3865 if (IsSigned) {
3866 Opc = NVPTX::BFE_S32rii;
3867 } else {
3868 Opc = NVPTX::BFE_U32rii;
3869 }
3870 } else if (Val.getValueType() == MVT::i64) {
3871 if (IsSigned) {
3872 Opc = NVPTX::BFE_S64rii;
3873 } else {
3874 Opc = NVPTX::BFE_U64rii;
3875 }
3876 } else {
3877 // We cannot handle this type
Justin Bogner8d83fb62016-05-13 21:12:53 +00003878 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003879 }
3880
3881 SDValue Ops[] = {
3882 Val, Start, Len
3883 };
3884
Justin Bogner8d83fb62016-05-13 21:12:53 +00003885 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3886 return true;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003887}
3888
Justin Holewinskiae556d32012-05-04 20:18:50 +00003889// SelectDirectAddr - Match a direct address for DAG.
3890// A direct address could be a globaladdress or externalsymbol.
3891bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3892 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003893 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3894 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003895 Address = N;
3896 return true;
3897 }
3898 if (N.getOpcode() == NVPTXISD::Wrapper) {
3899 Address = N.getOperand(0);
3900 return true;
3901 }
Artem Belevichb2e76a52016-07-20 18:39:47 +00003902 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3903 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3904 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3905 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3906 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3907 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003908 }
3909 return false;
3910}
3911
3912// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003913bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3914 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003915 if (Addr.getOpcode() == ISD::ADD) {
3916 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003917 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003918 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003919 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3920 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003921 return true;
3922 }
3923 }
3924 }
3925 return false;
3926}
3927
3928// symbol+offset
3929bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3930 SDValue &Base, SDValue &Offset) {
3931 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3932}
3933
3934// symbol+offset
3935bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3936 SDValue &Base, SDValue &Offset) {
3937 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3938}
3939
3940// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003941bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3942 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003943 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3944 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003945 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003946 return true;
3947 }
3948 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3949 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00003950 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00003951
3952 if (Addr.getOpcode() == ISD::ADD) {
3953 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3954 return false;
3955 }
3956 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3957 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00003958 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003959 // Constant offset from frame ref.
3960 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3961 else
3962 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003963 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3964 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003965 return true;
3966 }
3967 }
3968 return false;
3969}
3970
3971// register+offset
3972bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3973 SDValue &Base, SDValue &Offset) {
3974 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3975}
3976
3977// register+offset
3978bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3979 SDValue &Base, SDValue &Offset) {
3980 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3981}
3982
3983bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3984 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00003985 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00003986 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00003987 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3988 return true;
3989 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00003990 }
3991 if (!Src)
3992 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00003993 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003994 return (PT->getAddressSpace() == spN);
3995 return false;
3996}
3997
3998/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3999/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00004000bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00004001 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00004002 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00004003 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00004004 default:
4005 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00004006 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00004007 if (SelectDirectAddr(Op, Op0)) {
4008 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004009 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00004010 return false;
4011 }
4012 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
4013 OutOps.push_back(Op0);
4014 OutOps.push_back(Op1);
4015 return false;
4016 }
4017 break;
4018 }
4019 return true;
4020}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00004021
4022/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
4023/// conversion from \p SrcTy to \p DestTy.
4024unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
4025 bool IsSigned) {
4026 switch (SrcTy.SimpleTy) {
4027 default:
4028 llvm_unreachable("Unhandled source type");
4029 case MVT::i8:
4030 switch (DestTy.SimpleTy) {
4031 default:
4032 llvm_unreachable("Unhandled dest type");
4033 case MVT::i16:
4034 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
4035 case MVT::i32:
4036 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
4037 case MVT::i64:
4038 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
4039 }
4040 case MVT::i16:
4041 switch (DestTy.SimpleTy) {
4042 default:
4043 llvm_unreachable("Unhandled dest type");
4044 case MVT::i8:
4045 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
4046 case MVT::i32:
4047 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
4048 case MVT::i64:
4049 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
4050 }
4051 case MVT::i32:
4052 switch (DestTy.SimpleTy) {
4053 default:
4054 llvm_unreachable("Unhandled dest type");
4055 case MVT::i8:
4056 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
4057 case MVT::i16:
4058 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
4059 case MVT::i64:
4060 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
4061 }
4062 case MVT::i64:
4063 switch (DestTy.SimpleTy) {
4064 default:
4065 llvm_unreachable("Unhandled dest type");
4066 case MVT::i8:
4067 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
4068 case MVT::i16:
4069 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
4070 case MVT::i32:
4071 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
4072 }
4073 }
4074}
Artem Belevich3bafc2f2017-10-12 18:27:55 +00004075
4076bool NVPTXDAGToDAGISel::tryWMMA_LDST(SDNode *N) {
4077 SDValue Chain = N->getOperand(0);
4078 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4079 SDValue Op1 = N->getOperand(2);
4080 SDValue Addr, Offset, Base;
4081 Optional<unsigned> Opcode;
4082 SDLoc DL(N);
4083 MemSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4084 WmmaVariant Variant;
4085 SmallVector<SDValue, 12> Ops;
4086 bool isStore = N->getNumValues() == 1; // Store ops only return a chain.
4087
4088 if (SelectDirectAddr(Op1, Addr)) {
4089 Variant = WMMA_VARIANT_AVAR;
4090 Ops.push_back(Addr);
4091 } else if (SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) ||
4092 SelectADDRri64(Op1.getNode(), Op1, Base, Offset)) {
4093 Variant = WMMA_VARIANT_ARI64;
4094 Ops.push_back(Base);
4095 Ops.push_back(Offset);
4096 } else {
4097 Variant = WMMA_VARIANT_AVAR;
4098 Ops.push_back(Op1);
4099 }
4100 unsigned NumOps = N->getNumOperands();
4101 // Pass through the rest of the operands to the machine node.
4102 for (unsigned i = 3; i < NumOps; ++i)
4103 Ops.push_back(N->getOperand(i));
4104 Ops.push_back(Chain);
4105
4106 Opcode = getWmmaLdStOpcode(IID, Variant);
4107 if (!Opcode) {
4108 llvm::errs() << "tryWMMALD - no Opcode.\n";
4109 return false;
4110 }
4111
4112 EVT MemVT = MemSD->getMemoryVT();
4113 assert(MemVT.isVector() && "Expected vector return type.");
4114
4115 SDNode *MN;
4116 if (isStore) {
4117 MN = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
4118 } else {
4119 SmallVector<EVT, 9> InstVTs(MemVT.getVectorNumElements(),
4120 MemSD->getValueType(0));
4121 InstVTs.push_back(MVT::Other);
4122 MN = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTs, Ops);
4123 }
4124
4125 ReplaceNode(N, MN);
4126 return true;
4127}
4128
4129bool NVPTXDAGToDAGISel::tryWMMA_MMA(SDNode *N) {
4130 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4131 SDLoc DL(N);
4132 unsigned Opc;
4133
4134 switch (IID) {
4135 default:
4136 return false;
4137 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16:
4138 Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f16;
4139 break;
4140 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16_satfinite:
4141 Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f16_satfinite;
4142 break;
4143 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32:
4144 Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f32;
4145 break;
4146 case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32_satfinite:
4147 Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f32_satfinite;
4148 break;
4149 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16:
4150 Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f16;
4151 break;
4152 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16_satfinite:
4153 Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f16_satfinite;
4154 break;
4155 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32:
4156 Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f32;
4157 break;
4158 case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32_satfinite:
4159 Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f32_satfinite;
4160 break;
4161 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16:
4162 Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f16;
4163 break;
4164 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16_satfinite:
4165 Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f16_satfinite;
4166 break;
4167 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32:
4168 Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f32;
4169 break;
4170 case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32_satfinite:
4171 Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f32_satfinite;
4172 break;
4173 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16:
4174 Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f16;
4175 break;
4176 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16_satfinite:
4177 Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f16_satfinite;
4178 break;
4179 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32:
4180 Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f32;
4181 break;
4182 case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32_satfinite:
4183 Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f32_satfinite;
4184 break;
4185 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16:
4186 Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f16;
4187 break;
4188 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16_satfinite:
4189 Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f16_satfinite;
4190 break;
4191 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32:
4192 Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f32;
4193 break;
4194 case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32_satfinite:
4195 Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f32_satfinite;
4196 break;
4197 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16:
4198 Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f16;
4199 break;
4200 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16_satfinite:
4201 Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f16_satfinite;
4202 break;
4203 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32:
4204 Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f32;
4205 break;
4206 case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32_satfinite:
4207 Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f32_satfinite;
4208 break;
4209 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16:
4210 Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f16;
4211 break;
4212 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16_satfinite:
4213 Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f16_satfinite;
4214 break;
4215 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32:
4216 Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f32;
4217 break;
4218 case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32_satfinite:
4219 Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f32_satfinite;
4220 break;
4221 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16:
4222 Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f16;
4223 break;
4224 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16_satfinite:
4225 Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f16_satfinite;
4226 break;
4227 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32:
4228 Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f32;
4229 break;
4230 case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32_satfinite:
4231 Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f32_satfinite;
4232 break;
4233 }
4234
4235 SmallVector<SDValue, 24> Ops;
4236 // Pass through operands and return value types to the machine node.
4237 for (unsigned i = 1; i < N->getNumOperands(); ++i)
4238 Ops.push_back(N->getOperand(i));
4239 SmallVector<EVT, 8> InstVTs(N->getNumValues(), N->getValueType(0));
4240 SDNode *MN = CurDAG->getMachineNode(Opc, DL, InstVTs, Ops);
4241 ReplaceNode(N, MN);
4242 return true;
4243}