blob: 7da621ccdc3943799702fb4c260dd5d19a5f73d0 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinskiae556d32012-05-04 20:18:50 +000029/// createNVPTXISelDag - This pass converts a legalized DAG into a
30/// NVPTX-specific DAG, ready for instruction scheduling.
31FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
32 llvm::CodeGenOpt::Level OptLevel) {
33 return new NVPTXDAGToDAGISel(TM, OptLevel);
34}
35
Justin Holewinskiae556d32012-05-04 20:18:50 +000036NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
37 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000038 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000039 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000040}
Justin Holewinskiae556d32012-05-04 20:18:50 +000041
Eric Christopher147bba22015-01-30 01:40:59 +000042bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Justin Lebar077f8fb2017-01-21 01:00:14 +000043 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
44 return SelectionDAGISel::runOnMachineFunction(MF);
Eric Christopher147bba22015-01-30 01:40:59 +000045}
46
Justin Holewinskicd069e62013-07-22 12:18:04 +000047int NVPTXDAGToDAGISel::getDivF32Level() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000048 return Subtarget->getTargetLowering()->getDivF32Level();
Justin Holewinskicd069e62013-07-22 12:18:04 +000049}
Justin Holewinskiae556d32012-05-04 20:18:50 +000050
Justin Holewinskicd069e62013-07-22 12:18:04 +000051bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000052 return Subtarget->getTargetLowering()->usePrecSqrtF32();
Justin Holewinskicd069e62013-07-22 12:18:04 +000053}
54
55bool NVPTXDAGToDAGISel::useF32FTZ() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000056 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
Justin Holewinskiae556d32012-05-04 20:18:50 +000057}
58
Justin Holewinski428cf0e2014-07-17 18:10:09 +000059bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +000060 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +000061 return TL->allowFMA(*MF, OptLevel);
62}
63
Artem Belevichd109f462017-01-13 18:48:13 +000064bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
65 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
66 return TL->allowUnsafeFPMath(*MF);
67}
68
Justin Holewinskiae556d32012-05-04 20:18:50 +000069/// Select - Select instructions not customized! Used for
70/// expanded, promoted and normal instructions.
Justin Bogner8d83fb62016-05-13 21:12:53 +000071void NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000072
Tim Northover31d093c2013-09-22 08:21:56 +000073 if (N->isMachineOpcode()) {
74 N->setNodeId(-1);
Justin Bogner8d83fb62016-05-13 21:12:53 +000075 return; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +000076 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000077
Justin Holewinskiae556d32012-05-04 20:18:50 +000078 switch (N->getOpcode()) {
79 case ISD::LOAD:
Justin Bogner8d83fb62016-05-13 21:12:53 +000080 if (tryLoad(N))
81 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000082 break;
83 case ISD::STORE:
Justin Bogner8d83fb62016-05-13 21:12:53 +000084 if (tryStore(N))
85 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000086 break;
Artem Belevich620db1f2017-02-23 22:38:24 +000087 case ISD::EXTRACT_VECTOR_ELT:
88 if (tryEXTRACT_VECTOR_ELEMENT(N))
89 return;
90 break;
91 case NVPTXISD::SETP_F16X2:
92 SelectSETP_F16X2(N);
93 return;
94
Justin Holewinskibe8dc642013-02-12 14:18:49 +000095 case NVPTXISD::LoadV2:
96 case NVPTXISD::LoadV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +000097 if (tryLoadVector(N))
98 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000099 break;
100 case NVPTXISD::LDGV2:
101 case NVPTXISD::LDGV4:
102 case NVPTXISD::LDUV2:
103 case NVPTXISD::LDUV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000104 if (tryLDGLDU(N))
105 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000106 break;
107 case NVPTXISD::StoreV2:
108 case NVPTXISD::StoreV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000109 if (tryStoreVector(N))
110 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000111 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000112 case NVPTXISD::LoadParam:
113 case NVPTXISD::LoadParamV2:
114 case NVPTXISD::LoadParamV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000115 if (tryLoadParam(N))
116 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000117 break;
118 case NVPTXISD::StoreRetval:
119 case NVPTXISD::StoreRetvalV2:
120 case NVPTXISD::StoreRetvalV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000121 if (tryStoreRetval(N))
122 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000123 break;
124 case NVPTXISD::StoreParam:
125 case NVPTXISD::StoreParamV2:
126 case NVPTXISD::StoreParamV4:
127 case NVPTXISD::StoreParamS32:
128 case NVPTXISD::StoreParamU32:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000129 if (tryStoreParam(N))
130 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000131 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000132 case ISD::INTRINSIC_WO_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000133 if (tryIntrinsicNoChain(N))
134 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000135 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000136 case ISD::INTRINSIC_W_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000137 if (tryIntrinsicChain(N))
138 return;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000139 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000140 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000141 case NVPTXISD::Tex1DFloatFloat:
142 case NVPTXISD::Tex1DFloatFloatLevel:
143 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000144 case NVPTXISD::Tex1DS32S32:
145 case NVPTXISD::Tex1DS32Float:
146 case NVPTXISD::Tex1DS32FloatLevel:
147 case NVPTXISD::Tex1DS32FloatGrad:
148 case NVPTXISD::Tex1DU32S32:
149 case NVPTXISD::Tex1DU32Float:
150 case NVPTXISD::Tex1DU32FloatLevel:
151 case NVPTXISD::Tex1DU32FloatGrad:
152 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000153 case NVPTXISD::Tex1DArrayFloatFloat:
154 case NVPTXISD::Tex1DArrayFloatFloatLevel:
155 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000156 case NVPTXISD::Tex1DArrayS32S32:
157 case NVPTXISD::Tex1DArrayS32Float:
158 case NVPTXISD::Tex1DArrayS32FloatLevel:
159 case NVPTXISD::Tex1DArrayS32FloatGrad:
160 case NVPTXISD::Tex1DArrayU32S32:
161 case NVPTXISD::Tex1DArrayU32Float:
162 case NVPTXISD::Tex1DArrayU32FloatLevel:
163 case NVPTXISD::Tex1DArrayU32FloatGrad:
164 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000165 case NVPTXISD::Tex2DFloatFloat:
166 case NVPTXISD::Tex2DFloatFloatLevel:
167 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000168 case NVPTXISD::Tex2DS32S32:
169 case NVPTXISD::Tex2DS32Float:
170 case NVPTXISD::Tex2DS32FloatLevel:
171 case NVPTXISD::Tex2DS32FloatGrad:
172 case NVPTXISD::Tex2DU32S32:
173 case NVPTXISD::Tex2DU32Float:
174 case NVPTXISD::Tex2DU32FloatLevel:
175 case NVPTXISD::Tex2DU32FloatGrad:
176 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000177 case NVPTXISD::Tex2DArrayFloatFloat:
178 case NVPTXISD::Tex2DArrayFloatFloatLevel:
179 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000180 case NVPTXISD::Tex2DArrayS32S32:
181 case NVPTXISD::Tex2DArrayS32Float:
182 case NVPTXISD::Tex2DArrayS32FloatLevel:
183 case NVPTXISD::Tex2DArrayS32FloatGrad:
184 case NVPTXISD::Tex2DArrayU32S32:
185 case NVPTXISD::Tex2DArrayU32Float:
186 case NVPTXISD::Tex2DArrayU32FloatLevel:
187 case NVPTXISD::Tex2DArrayU32FloatGrad:
188 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000189 case NVPTXISD::Tex3DFloatFloat:
190 case NVPTXISD::Tex3DFloatFloatLevel:
191 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000192 case NVPTXISD::Tex3DS32S32:
193 case NVPTXISD::Tex3DS32Float:
194 case NVPTXISD::Tex3DS32FloatLevel:
195 case NVPTXISD::Tex3DS32FloatGrad:
196 case NVPTXISD::Tex3DU32S32:
197 case NVPTXISD::Tex3DU32Float:
198 case NVPTXISD::Tex3DU32FloatLevel:
199 case NVPTXISD::Tex3DU32FloatGrad:
200 case NVPTXISD::TexCubeFloatFloat:
201 case NVPTXISD::TexCubeFloatFloatLevel:
202 case NVPTXISD::TexCubeS32Float:
203 case NVPTXISD::TexCubeS32FloatLevel:
204 case NVPTXISD::TexCubeU32Float:
205 case NVPTXISD::TexCubeU32FloatLevel:
206 case NVPTXISD::TexCubeArrayFloatFloat:
207 case NVPTXISD::TexCubeArrayFloatFloatLevel:
208 case NVPTXISD::TexCubeArrayS32Float:
209 case NVPTXISD::TexCubeArrayS32FloatLevel:
210 case NVPTXISD::TexCubeArrayU32Float:
211 case NVPTXISD::TexCubeArrayU32FloatLevel:
212 case NVPTXISD::Tld4R2DFloatFloat:
213 case NVPTXISD::Tld4G2DFloatFloat:
214 case NVPTXISD::Tld4B2DFloatFloat:
215 case NVPTXISD::Tld4A2DFloatFloat:
216 case NVPTXISD::Tld4R2DS64Float:
217 case NVPTXISD::Tld4G2DS64Float:
218 case NVPTXISD::Tld4B2DS64Float:
219 case NVPTXISD::Tld4A2DS64Float:
220 case NVPTXISD::Tld4R2DU64Float:
221 case NVPTXISD::Tld4G2DU64Float:
222 case NVPTXISD::Tld4B2DU64Float:
223 case NVPTXISD::Tld4A2DU64Float:
224 case NVPTXISD::TexUnified1DFloatS32:
225 case NVPTXISD::TexUnified1DFloatFloat:
226 case NVPTXISD::TexUnified1DFloatFloatLevel:
227 case NVPTXISD::TexUnified1DFloatFloatGrad:
228 case NVPTXISD::TexUnified1DS32S32:
229 case NVPTXISD::TexUnified1DS32Float:
230 case NVPTXISD::TexUnified1DS32FloatLevel:
231 case NVPTXISD::TexUnified1DS32FloatGrad:
232 case NVPTXISD::TexUnified1DU32S32:
233 case NVPTXISD::TexUnified1DU32Float:
234 case NVPTXISD::TexUnified1DU32FloatLevel:
235 case NVPTXISD::TexUnified1DU32FloatGrad:
236 case NVPTXISD::TexUnified1DArrayFloatS32:
237 case NVPTXISD::TexUnified1DArrayFloatFloat:
238 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
239 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
240 case NVPTXISD::TexUnified1DArrayS32S32:
241 case NVPTXISD::TexUnified1DArrayS32Float:
242 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
243 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
244 case NVPTXISD::TexUnified1DArrayU32S32:
245 case NVPTXISD::TexUnified1DArrayU32Float:
246 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
247 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
248 case NVPTXISD::TexUnified2DFloatS32:
249 case NVPTXISD::TexUnified2DFloatFloat:
250 case NVPTXISD::TexUnified2DFloatFloatLevel:
251 case NVPTXISD::TexUnified2DFloatFloatGrad:
252 case NVPTXISD::TexUnified2DS32S32:
253 case NVPTXISD::TexUnified2DS32Float:
254 case NVPTXISD::TexUnified2DS32FloatLevel:
255 case NVPTXISD::TexUnified2DS32FloatGrad:
256 case NVPTXISD::TexUnified2DU32S32:
257 case NVPTXISD::TexUnified2DU32Float:
258 case NVPTXISD::TexUnified2DU32FloatLevel:
259 case NVPTXISD::TexUnified2DU32FloatGrad:
260 case NVPTXISD::TexUnified2DArrayFloatS32:
261 case NVPTXISD::TexUnified2DArrayFloatFloat:
262 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
263 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
264 case NVPTXISD::TexUnified2DArrayS32S32:
265 case NVPTXISD::TexUnified2DArrayS32Float:
266 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
267 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
268 case NVPTXISD::TexUnified2DArrayU32S32:
269 case NVPTXISD::TexUnified2DArrayU32Float:
270 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
271 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
272 case NVPTXISD::TexUnified3DFloatS32:
273 case NVPTXISD::TexUnified3DFloatFloat:
274 case NVPTXISD::TexUnified3DFloatFloatLevel:
275 case NVPTXISD::TexUnified3DFloatFloatGrad:
276 case NVPTXISD::TexUnified3DS32S32:
277 case NVPTXISD::TexUnified3DS32Float:
278 case NVPTXISD::TexUnified3DS32FloatLevel:
279 case NVPTXISD::TexUnified3DS32FloatGrad:
280 case NVPTXISD::TexUnified3DU32S32:
281 case NVPTXISD::TexUnified3DU32Float:
282 case NVPTXISD::TexUnified3DU32FloatLevel:
283 case NVPTXISD::TexUnified3DU32FloatGrad:
284 case NVPTXISD::TexUnifiedCubeFloatFloat:
285 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
286 case NVPTXISD::TexUnifiedCubeS32Float:
287 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
288 case NVPTXISD::TexUnifiedCubeU32Float:
289 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
290 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
291 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
292 case NVPTXISD::TexUnifiedCubeArrayS32Float:
293 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
294 case NVPTXISD::TexUnifiedCubeArrayU32Float:
295 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
296 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
297 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
298 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
299 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
300 case NVPTXISD::Tld4UnifiedR2DS64Float:
301 case NVPTXISD::Tld4UnifiedG2DS64Float:
302 case NVPTXISD::Tld4UnifiedB2DS64Float:
303 case NVPTXISD::Tld4UnifiedA2DS64Float:
304 case NVPTXISD::Tld4UnifiedR2DU64Float:
305 case NVPTXISD::Tld4UnifiedG2DU64Float:
306 case NVPTXISD::Tld4UnifiedB2DU64Float:
307 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000308 if (tryTextureIntrinsic(N))
309 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000310 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000311 case NVPTXISD::Suld1DI8Clamp:
312 case NVPTXISD::Suld1DI16Clamp:
313 case NVPTXISD::Suld1DI32Clamp:
314 case NVPTXISD::Suld1DI64Clamp:
315 case NVPTXISD::Suld1DV2I8Clamp:
316 case NVPTXISD::Suld1DV2I16Clamp:
317 case NVPTXISD::Suld1DV2I32Clamp:
318 case NVPTXISD::Suld1DV2I64Clamp:
319 case NVPTXISD::Suld1DV4I8Clamp:
320 case NVPTXISD::Suld1DV4I16Clamp:
321 case NVPTXISD::Suld1DV4I32Clamp:
322 case NVPTXISD::Suld1DArrayI8Clamp:
323 case NVPTXISD::Suld1DArrayI16Clamp:
324 case NVPTXISD::Suld1DArrayI32Clamp:
325 case NVPTXISD::Suld1DArrayI64Clamp:
326 case NVPTXISD::Suld1DArrayV2I8Clamp:
327 case NVPTXISD::Suld1DArrayV2I16Clamp:
328 case NVPTXISD::Suld1DArrayV2I32Clamp:
329 case NVPTXISD::Suld1DArrayV2I64Clamp:
330 case NVPTXISD::Suld1DArrayV4I8Clamp:
331 case NVPTXISD::Suld1DArrayV4I16Clamp:
332 case NVPTXISD::Suld1DArrayV4I32Clamp:
333 case NVPTXISD::Suld2DI8Clamp:
334 case NVPTXISD::Suld2DI16Clamp:
335 case NVPTXISD::Suld2DI32Clamp:
336 case NVPTXISD::Suld2DI64Clamp:
337 case NVPTXISD::Suld2DV2I8Clamp:
338 case NVPTXISD::Suld2DV2I16Clamp:
339 case NVPTXISD::Suld2DV2I32Clamp:
340 case NVPTXISD::Suld2DV2I64Clamp:
341 case NVPTXISD::Suld2DV4I8Clamp:
342 case NVPTXISD::Suld2DV4I16Clamp:
343 case NVPTXISD::Suld2DV4I32Clamp:
344 case NVPTXISD::Suld2DArrayI8Clamp:
345 case NVPTXISD::Suld2DArrayI16Clamp:
346 case NVPTXISD::Suld2DArrayI32Clamp:
347 case NVPTXISD::Suld2DArrayI64Clamp:
348 case NVPTXISD::Suld2DArrayV2I8Clamp:
349 case NVPTXISD::Suld2DArrayV2I16Clamp:
350 case NVPTXISD::Suld2DArrayV2I32Clamp:
351 case NVPTXISD::Suld2DArrayV2I64Clamp:
352 case NVPTXISD::Suld2DArrayV4I8Clamp:
353 case NVPTXISD::Suld2DArrayV4I16Clamp:
354 case NVPTXISD::Suld2DArrayV4I32Clamp:
355 case NVPTXISD::Suld3DI8Clamp:
356 case NVPTXISD::Suld3DI16Clamp:
357 case NVPTXISD::Suld3DI32Clamp:
358 case NVPTXISD::Suld3DI64Clamp:
359 case NVPTXISD::Suld3DV2I8Clamp:
360 case NVPTXISD::Suld3DV2I16Clamp:
361 case NVPTXISD::Suld3DV2I32Clamp:
362 case NVPTXISD::Suld3DV2I64Clamp:
363 case NVPTXISD::Suld3DV4I8Clamp:
364 case NVPTXISD::Suld3DV4I16Clamp:
365 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000366 case NVPTXISD::Suld1DI8Trap:
367 case NVPTXISD::Suld1DI16Trap:
368 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000369 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000370 case NVPTXISD::Suld1DV2I8Trap:
371 case NVPTXISD::Suld1DV2I16Trap:
372 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000373 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000374 case NVPTXISD::Suld1DV4I8Trap:
375 case NVPTXISD::Suld1DV4I16Trap:
376 case NVPTXISD::Suld1DV4I32Trap:
377 case NVPTXISD::Suld1DArrayI8Trap:
378 case NVPTXISD::Suld1DArrayI16Trap:
379 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000380 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000381 case NVPTXISD::Suld1DArrayV2I8Trap:
382 case NVPTXISD::Suld1DArrayV2I16Trap:
383 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000384 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DArrayV4I8Trap:
386 case NVPTXISD::Suld1DArrayV4I16Trap:
387 case NVPTXISD::Suld1DArrayV4I32Trap:
388 case NVPTXISD::Suld2DI8Trap:
389 case NVPTXISD::Suld2DI16Trap:
390 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000391 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000392 case NVPTXISD::Suld2DV2I8Trap:
393 case NVPTXISD::Suld2DV2I16Trap:
394 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000395 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000396 case NVPTXISD::Suld2DV4I8Trap:
397 case NVPTXISD::Suld2DV4I16Trap:
398 case NVPTXISD::Suld2DV4I32Trap:
399 case NVPTXISD::Suld2DArrayI8Trap:
400 case NVPTXISD::Suld2DArrayI16Trap:
401 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000402 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000403 case NVPTXISD::Suld2DArrayV2I8Trap:
404 case NVPTXISD::Suld2DArrayV2I16Trap:
405 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000406 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000407 case NVPTXISD::Suld2DArrayV4I8Trap:
408 case NVPTXISD::Suld2DArrayV4I16Trap:
409 case NVPTXISD::Suld2DArrayV4I32Trap:
410 case NVPTXISD::Suld3DI8Trap:
411 case NVPTXISD::Suld3DI16Trap:
412 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000413 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000414 case NVPTXISD::Suld3DV2I8Trap:
415 case NVPTXISD::Suld3DV2I16Trap:
416 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000417 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000418 case NVPTXISD::Suld3DV4I8Trap:
419 case NVPTXISD::Suld3DV4I16Trap:
420 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld1DI8Zero:
422 case NVPTXISD::Suld1DI16Zero:
423 case NVPTXISD::Suld1DI32Zero:
424 case NVPTXISD::Suld1DI64Zero:
425 case NVPTXISD::Suld1DV2I8Zero:
426 case NVPTXISD::Suld1DV2I16Zero:
427 case NVPTXISD::Suld1DV2I32Zero:
428 case NVPTXISD::Suld1DV2I64Zero:
429 case NVPTXISD::Suld1DV4I8Zero:
430 case NVPTXISD::Suld1DV4I16Zero:
431 case NVPTXISD::Suld1DV4I32Zero:
432 case NVPTXISD::Suld1DArrayI8Zero:
433 case NVPTXISD::Suld1DArrayI16Zero:
434 case NVPTXISD::Suld1DArrayI32Zero:
435 case NVPTXISD::Suld1DArrayI64Zero:
436 case NVPTXISD::Suld1DArrayV2I8Zero:
437 case NVPTXISD::Suld1DArrayV2I16Zero:
438 case NVPTXISD::Suld1DArrayV2I32Zero:
439 case NVPTXISD::Suld1DArrayV2I64Zero:
440 case NVPTXISD::Suld1DArrayV4I8Zero:
441 case NVPTXISD::Suld1DArrayV4I16Zero:
442 case NVPTXISD::Suld1DArrayV4I32Zero:
443 case NVPTXISD::Suld2DI8Zero:
444 case NVPTXISD::Suld2DI16Zero:
445 case NVPTXISD::Suld2DI32Zero:
446 case NVPTXISD::Suld2DI64Zero:
447 case NVPTXISD::Suld2DV2I8Zero:
448 case NVPTXISD::Suld2DV2I16Zero:
449 case NVPTXISD::Suld2DV2I32Zero:
450 case NVPTXISD::Suld2DV2I64Zero:
451 case NVPTXISD::Suld2DV4I8Zero:
452 case NVPTXISD::Suld2DV4I16Zero:
453 case NVPTXISD::Suld2DV4I32Zero:
454 case NVPTXISD::Suld2DArrayI8Zero:
455 case NVPTXISD::Suld2DArrayI16Zero:
456 case NVPTXISD::Suld2DArrayI32Zero:
457 case NVPTXISD::Suld2DArrayI64Zero:
458 case NVPTXISD::Suld2DArrayV2I8Zero:
459 case NVPTXISD::Suld2DArrayV2I16Zero:
460 case NVPTXISD::Suld2DArrayV2I32Zero:
461 case NVPTXISD::Suld2DArrayV2I64Zero:
462 case NVPTXISD::Suld2DArrayV4I8Zero:
463 case NVPTXISD::Suld2DArrayV4I16Zero:
464 case NVPTXISD::Suld2DArrayV4I32Zero:
465 case NVPTXISD::Suld3DI8Zero:
466 case NVPTXISD::Suld3DI16Zero:
467 case NVPTXISD::Suld3DI32Zero:
468 case NVPTXISD::Suld3DI64Zero:
469 case NVPTXISD::Suld3DV2I8Zero:
470 case NVPTXISD::Suld3DV2I16Zero:
471 case NVPTXISD::Suld3DV2I32Zero:
472 case NVPTXISD::Suld3DV2I64Zero:
473 case NVPTXISD::Suld3DV4I8Zero:
474 case NVPTXISD::Suld3DV4I16Zero:
475 case NVPTXISD::Suld3DV4I32Zero:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000476 if (trySurfaceIntrinsic(N))
477 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000478 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000479 case ISD::AND:
480 case ISD::SRA:
481 case ISD::SRL:
482 // Try to select BFE
Justin Bogner8d83fb62016-05-13 21:12:53 +0000483 if (tryBFE(N))
484 return;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000485 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000486 case ISD::ADDRSPACECAST:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000487 SelectAddrSpaceCast(N);
488 return;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000489 case ISD::ConstantFP:
490 if (tryConstantFP16(N))
491 return;
492 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000493 default:
494 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000495 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000496 SelectCode(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000497}
498
Justin Bogner8d83fb62016-05-13 21:12:53 +0000499bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000500 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501 switch (IID) {
502 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000503 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000504 case Intrinsic::nvvm_ldg_global_f:
505 case Intrinsic::nvvm_ldg_global_i:
506 case Intrinsic::nvvm_ldg_global_p:
507 case Intrinsic::nvvm_ldu_global_f:
508 case Intrinsic::nvvm_ldu_global_i:
509 case Intrinsic::nvvm_ldu_global_p:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000510 return tryLDGLDU(N);
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000511 }
512}
513
Artem Belevich64dc9be2017-01-13 20:56:17 +0000514// There's no way to specify FP16 immediates in .f16 ops, so we have to
515// load them into an .f16 register first.
516bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517 if (N->getValueType(0) != MVT::f16)
518 return false;
519 SDValue Val = CurDAG->getTargetConstantFP(
520 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521 SDNode *LoadConstF16 =
522 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523 ReplaceNode(N, LoadConstF16);
524 return true;
525}
526
Artem Belevich620db1f2017-02-23 22:38:24 +0000527// Map ISD:CONDCODE value to appropriate CmpMode expected by
528// NVPTXInstPrinter::printCmpMode()
529static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
530 using NVPTX::PTXCmpMode::CmpMode;
531 unsigned PTXCmpMode = [](ISD::CondCode CC) {
532 switch (CC) {
533 default:
534 llvm_unreachable("Unexpected condition code.");
535 case ISD::SETOEQ:
536 return CmpMode::EQ;
537 case ISD::SETOGT:
538 return CmpMode::GT;
539 case ISD::SETOGE:
540 return CmpMode::GE;
541 case ISD::SETOLT:
542 return CmpMode::LT;
543 case ISD::SETOLE:
544 return CmpMode::LE;
545 case ISD::SETONE:
546 return CmpMode::NE;
547 case ISD::SETO:
548 return CmpMode::NUM;
549 case ISD::SETUO:
550 return CmpMode::NotANumber;
551 case ISD::SETUEQ:
552 return CmpMode::EQU;
553 case ISD::SETUGT:
554 return CmpMode::GTU;
555 case ISD::SETUGE:
556 return CmpMode::GEU;
557 case ISD::SETULT:
558 return CmpMode::LTU;
559 case ISD::SETULE:
560 return CmpMode::LEU;
561 case ISD::SETUNE:
562 return CmpMode::NEU;
563 case ISD::SETEQ:
564 return CmpMode::EQ;
565 case ISD::SETGT:
566 return CmpMode::GT;
567 case ISD::SETGE:
568 return CmpMode::GE;
569 case ISD::SETLT:
570 return CmpMode::LT;
571 case ISD::SETLE:
572 return CmpMode::LE;
573 case ISD::SETNE:
574 return CmpMode::NE;
575 }
576 }(CondCode.get());
577
578 if (FTZ)
579 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580
581 return PTXCmpMode;
582}
583
584bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585 unsigned PTXCmpMode =
586 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587 SDLoc DL(N);
588 SDNode *SetP = CurDAG->getMachineNode(
589 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591 ReplaceNode(N, SetP);
592 return true;
593}
594
595// Find all instances of extract_vector_elt that use this v2f16 vector
596// and coalesce them into a scattering move instruction.
597bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598 SDValue Vector = N->getOperand(0);
599
600 // We only care about f16x2 as it's the only real vector type we
601 // need to deal with.
602 if (Vector.getSimpleValueType() != MVT::v2f16)
603 return false;
604
605 // Find and record all uses of this vector that extract element 0 or 1.
606 SmallVector<SDNode *, 4> E0, E1;
607 for (const auto &U : Vector.getNode()->uses()) {
608 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609 continue;
610 if (U->getOperand(0) != Vector)
611 continue;
612 if (const ConstantSDNode *IdxConst =
613 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614 if (IdxConst->getZExtValue() == 0)
615 E0.push_back(U);
616 else if (IdxConst->getZExtValue() == 1)
617 E1.push_back(U);
618 else
619 llvm_unreachable("Invalid vector index.");
620 }
621 }
622
623 // There's no point scattering f16x2 if we only ever access one
624 // element of it.
625 if (E0.empty() || E1.empty())
626 return false;
627
628 unsigned Op = NVPTX::SplitF16x2;
629 // If the vector has been BITCAST'ed from i32, we can use original
630 // value directly and avoid register-to-register move.
631 SDValue Source = Vector;
632 if (Vector->getOpcode() == ISD::BITCAST) {
633 Op = NVPTX::SplitI32toF16x2;
634 Source = Vector->getOperand(0);
635 }
636 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637 // into f16,f16 SplitF16x2(V)
638 SDNode *ScatterOp =
639 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640 for (auto *Node : E0)
641 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642 for (auto *Node : E1)
643 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644
645 return true;
646}
647
Eric Christopher9745b3a2015-01-30 01:41:01 +0000648static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000649 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000650
Justin Holewinskiae556d32012-05-04 20:18:50 +0000651 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000652 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000653
Craig Toppere3dcce92015-08-01 22:20:21 +0000654 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000655 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000656 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
657 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
658 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
659 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
660 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
661 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
662 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000663 }
664 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000665 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000666}
667
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000668static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000669 unsigned CodeAddrSpace, MachineFunction *F) {
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000670 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671 // space.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000672 //
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000673 // We have two ways of identifying invariant loads: Loads may be explicitly
674 // marked as invariant, or we may infer them to be invariant.
675 //
676 // We currently infer invariance only for kernel function pointer params that
677 // are noalias (i.e. __restrict) and never written to.
678 //
679 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
680 // not during the SelectionDAG phase).
681 //
682 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
683 // explicitly invariant loads because these are how clang tells us to use ldg
684 // when the user uses a builtin.
685 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000686 return false;
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000687
688 if (N->isInvariant())
689 return true;
690
691 // Load wasn't explicitly invariant. Attempt to infer invariance.
692 if (!isKernelFunction(*F->getFunction()))
693 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000694
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000695 // We use GetUnderlyingObjects() here instead of
696 // GetUnderlyingObject() mainly because the former looks through phi
697 // nodes while the latter does not. We need to look through phi
698 // nodes to handle pointer induction variables.
699 SmallVector<Value *, 8> Objs;
700 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
701 Objs, F->getDataLayout());
702 for (Value *Obj : Objs) {
703 auto *A = dyn_cast<const Argument>(Obj);
704 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
705 }
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000706
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000707 return true;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000708}
709
Justin Bogner8d83fb62016-05-13 21:12:53 +0000710bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000711 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
712 switch (IID) {
713 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000714 return false;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000715 case Intrinsic::nvvm_texsurf_handle_internal:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000716 SelectTexSurfHandle(N);
717 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000718 }
719}
720
Justin Bogner8d83fb62016-05-13 21:12:53 +0000721void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000722 // Op 0 is the intrinsic ID
723 SDValue Wrapper = N->getOperand(1);
724 SDValue GlobalVal = Wrapper.getOperand(0);
Justin Bogner8d83fb62016-05-13 21:12:53 +0000725 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
726 MVT::i64, GlobalVal));
Justin Holewinski30d56a72014-04-09 15:39:15 +0000727}
728
Justin Bogner8d83fb62016-05-13 21:12:53 +0000729void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000730 SDValue Src = N->getOperand(0);
731 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
732 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
733 unsigned DstAddrSpace = CastN->getDestAddressSpace();
734
735 assert(SrcAddrSpace != DstAddrSpace &&
736 "addrspacecast must be between different address spaces");
737
738 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
739 // Specific to generic
740 unsigned Opc;
741 switch (SrcAddrSpace) {
742 default: report_fatal_error("Bad address space in addrspacecast");
743 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000744 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000745 break;
746 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000747 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000748 break;
749 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000750 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000751 break;
752 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000753 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000754 break;
755 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000756 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
757 Src));
758 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000759 } else {
760 // Generic to specific
761 if (SrcAddrSpace != 0)
762 report_fatal_error("Cannot cast between two non-generic address spaces");
763 unsigned Opc;
764 switch (DstAddrSpace) {
765 default: report_fatal_error("Bad address space in addrspacecast");
766 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000767 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
768 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000769 break;
770 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000771 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
772 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000773 break;
774 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000775 Opc =
776 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000777 break;
778 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000779 Opc =
780 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000781 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000782 case ADDRESS_SPACE_PARAM:
783 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
784 : NVPTX::nvvm_ptr_gen_to_param;
785 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000786 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000787 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
788 Src));
789 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000790 }
791}
792
Justin Bogner8d83fb62016-05-13 21:12:53 +0000793bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000794 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000795 LoadSDNode *LD = cast<LoadSDNode>(N);
796 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000797 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000798
799 // do not support pre/post inc/dec
800 if (LD->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000801 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000802
803 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000804 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000805
806 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000807 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000808
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000809 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000810 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000811 }
812
Justin Holewinskiae556d32012-05-04 20:18:50 +0000813 // Volatile Setting
814 // - .volatile is only availalble for .global and .shared
815 bool isVolatile = LD->isVolatile();
816 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
817 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
818 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
819 isVolatile = false;
820
Justin Holewinskiae556d32012-05-04 20:18:50 +0000821 // Type Setting: fromType + fromTypeWidth
822 //
823 // Sign : ISD::SEXTLOAD
824 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
825 // type is integer
826 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
Artem Belevich620db1f2017-02-23 22:38:24 +0000827 MVT SimpleVT = LoadedVT.getSimpleVT();
Justin Holewinskiae556d32012-05-04 20:18:50 +0000828 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000829 // Read at least 8 bits (predicates are stored as 8-bit values)
830 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000831 unsigned int fromType;
Artem Belevich620db1f2017-02-23 22:38:24 +0000832
833 // Vector Setting
834 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
835 if (SimpleVT.isVector()) {
836 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
837 // v2f16 is loaded using ld.b32
838 fromTypeWidth = 32;
839 }
840
Justin Holewinskiae556d32012-05-04 20:18:50 +0000841 if ((LD->getExtensionType() == ISD::SEXTLOAD))
842 fromType = NVPTX::PTXLdStInstCode::Signed;
843 else if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +0000844 // f16 uses .b16 as its storage type.
845 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
846 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000847 else
848 fromType = NVPTX::PTXLdStInstCode::Unsigned;
849
850 // Create the machine instruction DAG
851 SDValue Chain = N->getOperand(0);
852 SDValue N1 = N->getOperand(1);
853 SDValue Addr;
854 SDValue Offset, Base;
855 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000856 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000857
858 if (SelectDirectAddr(N1, Addr)) {
859 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000860 case MVT::i8:
861 Opcode = NVPTX::LD_i8_avar;
862 break;
863 case MVT::i16:
864 Opcode = NVPTX::LD_i16_avar;
865 break;
866 case MVT::i32:
867 Opcode = NVPTX::LD_i32_avar;
868 break;
869 case MVT::i64:
870 Opcode = NVPTX::LD_i64_avar;
871 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000872 case MVT::f16:
873 Opcode = NVPTX::LD_f16_avar;
874 break;
Artem Belevich620db1f2017-02-23 22:38:24 +0000875 case MVT::v2f16:
876 Opcode = NVPTX::LD_f16x2_avar;
877 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000878 case MVT::f32:
879 Opcode = NVPTX::LD_f32_avar;
880 break;
881 case MVT::f64:
882 Opcode = NVPTX::LD_f64_avar;
883 break;
884 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000885 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000886 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000887 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
888 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
889 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000890 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000891 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
892 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000893 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000894 case MVT::i8:
895 Opcode = NVPTX::LD_i8_asi;
896 break;
897 case MVT::i16:
898 Opcode = NVPTX::LD_i16_asi;
899 break;
900 case MVT::i32:
901 Opcode = NVPTX::LD_i32_asi;
902 break;
903 case MVT::i64:
904 Opcode = NVPTX::LD_i64_asi;
905 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000906 case MVT::f16:
907 Opcode = NVPTX::LD_f16_asi;
908 break;
Artem Belevich620db1f2017-02-23 22:38:24 +0000909 case MVT::v2f16:
910 Opcode = NVPTX::LD_f16x2_asi;
911 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000912 case MVT::f32:
913 Opcode = NVPTX::LD_f32_asi;
914 break;
915 case MVT::f64:
916 Opcode = NVPTX::LD_f64_asi;
917 break;
918 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000919 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000920 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000921 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
922 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
923 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000924 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000925 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
926 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
927 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000928 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000929 case MVT::i8:
930 Opcode = NVPTX::LD_i8_ari_64;
931 break;
932 case MVT::i16:
933 Opcode = NVPTX::LD_i16_ari_64;
934 break;
935 case MVT::i32:
936 Opcode = NVPTX::LD_i32_ari_64;
937 break;
938 case MVT::i64:
939 Opcode = NVPTX::LD_i64_ari_64;
940 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000941 case MVT::f16:
942 Opcode = NVPTX::LD_f16_ari_64;
943 break;
Artem Belevich620db1f2017-02-23 22:38:24 +0000944 case MVT::v2f16:
945 Opcode = NVPTX::LD_f16x2_ari_64;
946 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000947 case MVT::f32:
948 Opcode = NVPTX::LD_f32_ari_64;
949 break;
950 case MVT::f64:
951 Opcode = NVPTX::LD_f64_ari_64;
952 break;
953 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000954 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000955 }
956 } else {
957 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000958 case MVT::i8:
959 Opcode = NVPTX::LD_i8_ari;
960 break;
961 case MVT::i16:
962 Opcode = NVPTX::LD_i16_ari;
963 break;
964 case MVT::i32:
965 Opcode = NVPTX::LD_i32_ari;
966 break;
967 case MVT::i64:
968 Opcode = NVPTX::LD_i64_ari;
969 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000970 case MVT::f16:
971 Opcode = NVPTX::LD_f16_ari;
972 break;
Artem Belevich620db1f2017-02-23 22:38:24 +0000973 case MVT::v2f16:
974 Opcode = NVPTX::LD_f16x2_ari;
975 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000976 case MVT::f32:
977 Opcode = NVPTX::LD_f32_ari;
978 break;
979 case MVT::f64:
980 Opcode = NVPTX::LD_f64_ari;
981 break;
982 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000983 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000984 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000985 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000986 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
987 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
988 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000989 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000990 } else {
Eric Christopher02389e32015-02-19 00:08:27 +0000991 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000992 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000993 case MVT::i8:
994 Opcode = NVPTX::LD_i8_areg_64;
995 break;
996 case MVT::i16:
997 Opcode = NVPTX::LD_i16_areg_64;
998 break;
999 case MVT::i32:
1000 Opcode = NVPTX::LD_i32_areg_64;
1001 break;
1002 case MVT::i64:
1003 Opcode = NVPTX::LD_i64_areg_64;
1004 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00001005 case MVT::f16:
1006 Opcode = NVPTX::LD_f16_areg_64;
1007 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001008 case MVT::v2f16:
1009 Opcode = NVPTX::LD_f16x2_areg_64;
1010 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001011 case MVT::f32:
1012 Opcode = NVPTX::LD_f32_areg_64;
1013 break;
1014 case MVT::f64:
1015 Opcode = NVPTX::LD_f64_areg_64;
1016 break;
1017 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001018 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001019 }
1020 } else {
1021 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001022 case MVT::i8:
1023 Opcode = NVPTX::LD_i8_areg;
1024 break;
1025 case MVT::i16:
1026 Opcode = NVPTX::LD_i16_areg;
1027 break;
1028 case MVT::i32:
1029 Opcode = NVPTX::LD_i32_areg;
1030 break;
1031 case MVT::i64:
1032 Opcode = NVPTX::LD_i64_areg;
1033 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00001034 case MVT::f16:
1035 Opcode = NVPTX::LD_f16_areg;
1036 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001037 case MVT::v2f16:
1038 Opcode = NVPTX::LD_f16x2_areg;
1039 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001040 case MVT::f32:
1041 Opcode = NVPTX::LD_f32_areg;
1042 break;
1043 case MVT::f64:
1044 Opcode = NVPTX::LD_f64_areg;
1045 break;
1046 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001047 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001048 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00001049 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001050 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
1051 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1052 getI32Imm(fromTypeWidth, dl), N1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001053 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001054 }
1055
Justin Bogner8d83fb62016-05-13 21:12:53 +00001056 if (!NVPTXLD)
1057 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001058
Justin Bogner8d83fb62016-05-13 21:12:53 +00001059 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1060 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1061 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1062
1063 ReplaceNode(N, NVPTXLD);
1064 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001065}
1066
Justin Bogner8d83fb62016-05-13 21:12:53 +00001067bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001068
1069 SDValue Chain = N->getOperand(0);
1070 SDValue Op1 = N->getOperand(1);
1071 SDValue Addr, Offset, Base;
1072 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001073 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001074 SDNode *LD;
1075 MemSDNode *MemSD = cast<MemSDNode>(N);
1076 EVT LoadedVT = MemSD->getMemoryVT();
1077
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001078 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001079 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001080
1081 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001082 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001083
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +00001084 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00001085 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001086 }
1087
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001088 // Volatile Setting
1089 // - .volatile is only availalble for .global and .shared
1090 bool IsVolatile = MemSD->isVolatile();
1091 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1092 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1093 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1094 IsVolatile = false;
1095
1096 // Vector Setting
1097 MVT SimpleVT = LoadedVT.getSimpleVT();
1098
1099 // Type Setting: fromType + fromTypeWidth
1100 //
1101 // Sign : ISD::SEXTLOAD
1102 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1103 // type is integer
1104 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1105 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001106 // Read at least 8 bits (predicates are stored as 8-bit values)
1107 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001108 unsigned int FromType;
1109 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +00001110 unsigned ExtensionType = cast<ConstantSDNode>(
1111 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001112 if (ExtensionType == ISD::SEXTLOAD)
1113 FromType = NVPTX::PTXLdStInstCode::Signed;
1114 else if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001115 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1116 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001117 else
1118 FromType = NVPTX::PTXLdStInstCode::Unsigned;
1119
1120 unsigned VecType;
1121
1122 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001123 case NVPTXISD::LoadV2:
1124 VecType = NVPTX::PTXLdStInstCode::V2;
1125 break;
1126 case NVPTXISD::LoadV4:
1127 VecType = NVPTX::PTXLdStInstCode::V4;
1128 break;
1129 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001130 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001131 }
1132
1133 EVT EltVT = N->getValueType(0);
1134
Artem Belevich620db1f2017-02-23 22:38:24 +00001135 // v8f16 is a special case. PTX doesn't have ld.v8.f16
1136 // instruction. Instead, we split the vector into v2f16 chunks and
1137 // load them with ld.v4.b32.
1138 if (EltVT == MVT::v2f16) {
1139 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1140 EltVT = MVT::i32;
1141 FromType = NVPTX::PTXLdStInstCode::Untyped;
1142 FromTypeWidth = 32;
1143 }
1144
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001145 if (SelectDirectAddr(Op1, Addr)) {
1146 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001147 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001148 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001149 case NVPTXISD::LoadV2:
1150 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001151 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001152 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001153 case MVT::i8:
1154 Opcode = NVPTX::LDV_i8_v2_avar;
1155 break;
1156 case MVT::i16:
1157 Opcode = NVPTX::LDV_i16_v2_avar;
1158 break;
1159 case MVT::i32:
1160 Opcode = NVPTX::LDV_i32_v2_avar;
1161 break;
1162 case MVT::i64:
1163 Opcode = NVPTX::LDV_i64_v2_avar;
1164 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001165 case MVT::f16:
1166 Opcode = NVPTX::LDV_f16_v2_avar;
1167 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001168 case MVT::f32:
1169 Opcode = NVPTX::LDV_f32_v2_avar;
1170 break;
1171 case MVT::f64:
1172 Opcode = NVPTX::LDV_f64_v2_avar;
1173 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001174 }
1175 break;
1176 case NVPTXISD::LoadV4:
1177 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001178 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001179 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001180 case MVT::i8:
1181 Opcode = NVPTX::LDV_i8_v4_avar;
1182 break;
1183 case MVT::i16:
1184 Opcode = NVPTX::LDV_i16_v4_avar;
1185 break;
1186 case MVT::i32:
1187 Opcode = NVPTX::LDV_i32_v4_avar;
1188 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001189 case MVT::f16:
1190 Opcode = NVPTX::LDV_f16_v4_avar;
1191 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001192 case MVT::f32:
1193 Opcode = NVPTX::LDV_f32_v4_avar;
1194 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001195 }
1196 break;
1197 }
1198
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001199 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1200 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1201 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001202 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001203 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1204 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001205 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001206 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001207 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001208 case NVPTXISD::LoadV2:
1209 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001210 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001211 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001212 case MVT::i8:
1213 Opcode = NVPTX::LDV_i8_v2_asi;
1214 break;
1215 case MVT::i16:
1216 Opcode = NVPTX::LDV_i16_v2_asi;
1217 break;
1218 case MVT::i32:
1219 Opcode = NVPTX::LDV_i32_v2_asi;
1220 break;
1221 case MVT::i64:
1222 Opcode = NVPTX::LDV_i64_v2_asi;
1223 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001224 case MVT::f16:
1225 Opcode = NVPTX::LDV_f16_v2_asi;
1226 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001227 case MVT::f32:
1228 Opcode = NVPTX::LDV_f32_v2_asi;
1229 break;
1230 case MVT::f64:
1231 Opcode = NVPTX::LDV_f64_v2_asi;
1232 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001233 }
1234 break;
1235 case NVPTXISD::LoadV4:
1236 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001237 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001238 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001239 case MVT::i8:
1240 Opcode = NVPTX::LDV_i8_v4_asi;
1241 break;
1242 case MVT::i16:
1243 Opcode = NVPTX::LDV_i16_v4_asi;
1244 break;
1245 case MVT::i32:
1246 Opcode = NVPTX::LDV_i32_v4_asi;
1247 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001248 case MVT::f16:
1249 Opcode = NVPTX::LDV_f16_v4_asi;
1250 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001251 case MVT::f32:
1252 Opcode = NVPTX::LDV_f32_v4_asi;
1253 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001254 }
1255 break;
1256 }
1257
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001258 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1259 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1260 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001261 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001262 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1263 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1264 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001265 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001266 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001267 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001268 case NVPTXISD::LoadV2:
1269 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001270 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001271 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001272 case MVT::i8:
1273 Opcode = NVPTX::LDV_i8_v2_ari_64;
1274 break;
1275 case MVT::i16:
1276 Opcode = NVPTX::LDV_i16_v2_ari_64;
1277 break;
1278 case MVT::i32:
1279 Opcode = NVPTX::LDV_i32_v2_ari_64;
1280 break;
1281 case MVT::i64:
1282 Opcode = NVPTX::LDV_i64_v2_ari_64;
1283 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001284 case MVT::f16:
1285 Opcode = NVPTX::LDV_f16_v2_ari_64;
1286 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001287 case MVT::f32:
1288 Opcode = NVPTX::LDV_f32_v2_ari_64;
1289 break;
1290 case MVT::f64:
1291 Opcode = NVPTX::LDV_f64_v2_ari_64;
1292 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001293 }
1294 break;
1295 case NVPTXISD::LoadV4:
1296 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001297 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001298 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001299 case MVT::i8:
1300 Opcode = NVPTX::LDV_i8_v4_ari_64;
1301 break;
1302 case MVT::i16:
1303 Opcode = NVPTX::LDV_i16_v4_ari_64;
1304 break;
1305 case MVT::i32:
1306 Opcode = NVPTX::LDV_i32_v4_ari_64;
1307 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001308 case MVT::f16:
1309 Opcode = NVPTX::LDV_f16_v4_ari_64;
1310 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001311 case MVT::f32:
1312 Opcode = NVPTX::LDV_f32_v4_ari_64;
1313 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001314 }
1315 break;
1316 }
1317 } else {
1318 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001319 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001320 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001321 case NVPTXISD::LoadV2:
1322 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001323 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001324 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001325 case MVT::i8:
1326 Opcode = NVPTX::LDV_i8_v2_ari;
1327 break;
1328 case MVT::i16:
1329 Opcode = NVPTX::LDV_i16_v2_ari;
1330 break;
1331 case MVT::i32:
1332 Opcode = NVPTX::LDV_i32_v2_ari;
1333 break;
1334 case MVT::i64:
1335 Opcode = NVPTX::LDV_i64_v2_ari;
1336 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001337 case MVT::f16:
1338 Opcode = NVPTX::LDV_f16_v2_ari;
1339 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001340 case MVT::f32:
1341 Opcode = NVPTX::LDV_f32_v2_ari;
1342 break;
1343 case MVT::f64:
1344 Opcode = NVPTX::LDV_f64_v2_ari;
1345 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001346 }
1347 break;
1348 case NVPTXISD::LoadV4:
1349 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001350 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001351 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001352 case MVT::i8:
1353 Opcode = NVPTX::LDV_i8_v4_ari;
1354 break;
1355 case MVT::i16:
1356 Opcode = NVPTX::LDV_i16_v4_ari;
1357 break;
1358 case MVT::i32:
1359 Opcode = NVPTX::LDV_i32_v4_ari;
1360 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001361 case MVT::f16:
1362 Opcode = NVPTX::LDV_f16_v4_ari;
1363 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001364 case MVT::f32:
1365 Opcode = NVPTX::LDV_f32_v4_ari;
1366 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001367 }
1368 break;
1369 }
1370 }
1371
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001372 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1373 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1374 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001375
Michael Liaob53d8962013-04-19 22:22:57 +00001376 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001377 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001378 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001379 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001380 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001381 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001382 case NVPTXISD::LoadV2:
1383 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001384 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001385 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001386 case MVT::i8:
1387 Opcode = NVPTX::LDV_i8_v2_areg_64;
1388 break;
1389 case MVT::i16:
1390 Opcode = NVPTX::LDV_i16_v2_areg_64;
1391 break;
1392 case MVT::i32:
1393 Opcode = NVPTX::LDV_i32_v2_areg_64;
1394 break;
1395 case MVT::i64:
1396 Opcode = NVPTX::LDV_i64_v2_areg_64;
1397 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001398 case MVT::f16:
1399 Opcode = NVPTX::LDV_f16_v2_areg_64;
1400 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001401 case MVT::f32:
1402 Opcode = NVPTX::LDV_f32_v2_areg_64;
1403 break;
1404 case MVT::f64:
1405 Opcode = NVPTX::LDV_f64_v2_areg_64;
1406 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001407 }
1408 break;
1409 case NVPTXISD::LoadV4:
1410 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001411 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001412 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001413 case MVT::i8:
1414 Opcode = NVPTX::LDV_i8_v4_areg_64;
1415 break;
1416 case MVT::i16:
1417 Opcode = NVPTX::LDV_i16_v4_areg_64;
1418 break;
1419 case MVT::i32:
1420 Opcode = NVPTX::LDV_i32_v4_areg_64;
1421 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001422 case MVT::f16:
1423 Opcode = NVPTX::LDV_f16_v4_areg_64;
1424 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001425 case MVT::f32:
1426 Opcode = NVPTX::LDV_f32_v4_areg_64;
1427 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001428 }
1429 break;
1430 }
1431 } else {
1432 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001433 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001434 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001435 case NVPTXISD::LoadV2:
1436 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001437 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001438 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001439 case MVT::i8:
1440 Opcode = NVPTX::LDV_i8_v2_areg;
1441 break;
1442 case MVT::i16:
1443 Opcode = NVPTX::LDV_i16_v2_areg;
1444 break;
1445 case MVT::i32:
1446 Opcode = NVPTX::LDV_i32_v2_areg;
1447 break;
1448 case MVT::i64:
1449 Opcode = NVPTX::LDV_i64_v2_areg;
1450 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001451 case MVT::f16:
1452 Opcode = NVPTX::LDV_f16_v2_areg;
1453 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001454 case MVT::f32:
1455 Opcode = NVPTX::LDV_f32_v2_areg;
1456 break;
1457 case MVT::f64:
1458 Opcode = NVPTX::LDV_f64_v2_areg;
1459 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001460 }
1461 break;
1462 case NVPTXISD::LoadV4:
1463 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001464 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001465 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001466 case MVT::i8:
1467 Opcode = NVPTX::LDV_i8_v4_areg;
1468 break;
1469 case MVT::i16:
1470 Opcode = NVPTX::LDV_i16_v4_areg;
1471 break;
1472 case MVT::i32:
1473 Opcode = NVPTX::LDV_i32_v4_areg;
1474 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00001475 case MVT::f16:
1476 Opcode = NVPTX::LDV_f16_v4_areg;
1477 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001478 case MVT::f32:
1479 Opcode = NVPTX::LDV_f32_v4_areg;
1480 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001481 }
1482 break;
1483 }
1484 }
1485
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001486 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1487 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1488 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001489 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001490 }
1491
1492 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1493 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1494 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1495
Justin Bogner8d83fb62016-05-13 21:12:53 +00001496 ReplaceNode(N, LD);
1497 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001498}
1499
Justin Bogner8d83fb62016-05-13 21:12:53 +00001500bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001501
1502 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001503 SDValue Op1;
1504 MemSDNode *Mem;
1505 bool IsLDG = true;
1506
Justin Holewinskic7997922016-04-05 12:38:01 +00001507 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001508 // LDG/LDU SD node (from custom vector handling), then its the second operand
1509 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1510 Op1 = N->getOperand(2);
1511 Mem = cast<MemIntrinsicSDNode>(N);
1512 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1513 switch (IID) {
1514 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001515 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001516 case Intrinsic::nvvm_ldg_global_f:
1517 case Intrinsic::nvvm_ldg_global_i:
1518 case Intrinsic::nvvm_ldg_global_p:
1519 IsLDG = true;
1520 break;
1521 case Intrinsic::nvvm_ldu_global_f:
1522 case Intrinsic::nvvm_ldu_global_i:
1523 case Intrinsic::nvvm_ldu_global_p:
1524 IsLDG = false;
1525 break;
1526 }
1527 } else {
1528 Op1 = N->getOperand(1);
1529 Mem = cast<MemSDNode>(N);
1530 }
1531
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001532 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001533 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001534 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001535 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001536
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001537 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001538 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001539 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001540 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001541 EltVT = EltVT.getVectorElementType();
1542 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001543
Justin Holewinskic7997922016-04-05 12:38:01 +00001544 // Build the "promoted" result VTList for the load. If we are really loading
1545 // i8s, then the return type will be promoted to i16 since we do not expose
1546 // 8-bit registers in NVPTX.
1547 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1548 SmallVector<EVT, 5> InstVTs;
1549 for (unsigned i = 0; i != NumElts; ++i) {
1550 InstVTs.push_back(NodeVT);
1551 }
1552 InstVTs.push_back(MVT::Other);
1553 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1554
Justin Holewinskie40e9292013-07-01 12:58:52 +00001555 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001556 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001557 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001558 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001559 case ISD::INTRINSIC_W_CHAIN:
1560 if (IsLDG) {
1561 switch (EltVT.getSimpleVT().SimpleTy) {
1562 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001563 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001564 case MVT::i8:
1565 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1566 break;
1567 case MVT::i16:
1568 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1569 break;
1570 case MVT::i32:
1571 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1572 break;
1573 case MVT::i64:
1574 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1575 break;
1576 case MVT::f32:
1577 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1578 break;
1579 case MVT::f64:
1580 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1581 break;
1582 }
1583 } else {
1584 switch (EltVT.getSimpleVT().SimpleTy) {
1585 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001586 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001587 case MVT::i8:
1588 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1589 break;
1590 case MVT::i16:
1591 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1592 break;
1593 case MVT::i32:
1594 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1595 break;
1596 case MVT::i64:
1597 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1598 break;
1599 case MVT::f32:
1600 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1601 break;
1602 case MVT::f64:
1603 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1604 break;
1605 }
1606 }
1607 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001608 case NVPTXISD::LDGV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001609 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001610 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001611 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001612 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001613 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001614 break;
1615 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001616 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001617 break;
1618 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001619 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001620 break;
1621 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001622 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001623 break;
1624 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001625 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001626 break;
1627 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001628 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001629 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001630 }
1631 break;
1632 case NVPTXISD::LDUV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001633 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001634 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001635 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001636 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001637 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001638 break;
1639 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001640 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001641 break;
1642 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001643 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001644 break;
1645 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001646 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001647 break;
1648 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001649 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001650 break;
1651 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001652 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1653 break;
1654 }
1655 break;
1656 case NVPTXISD::LDGV4:
1657 switch (EltVT.getSimpleVT().SimpleTy) {
1658 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001659 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001660 case MVT::i8:
1661 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1662 break;
1663 case MVT::i16:
1664 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1665 break;
1666 case MVT::i32:
1667 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1668 break;
1669 case MVT::f32:
1670 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001671 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001672 }
1673 break;
1674 case NVPTXISD::LDUV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001675 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001676 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001677 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001678 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001679 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001680 break;
1681 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001682 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001683 break;
1684 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001685 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001686 break;
1687 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001688 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001689 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001690 }
1691 break;
1692 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001693
1694 SDValue Ops[] = { Addr, Chain };
Justin Holewinskic7997922016-04-05 12:38:01 +00001695 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001696 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1697 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1698 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001699 switch (N->getOpcode()) {
1700 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001701 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001702 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001703 case ISD::INTRINSIC_W_CHAIN:
1704 if (IsLDG) {
1705 switch (EltVT.getSimpleVT().SimpleTy) {
1706 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001707 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001708 case MVT::i8:
1709 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1710 break;
1711 case MVT::i16:
1712 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1713 break;
1714 case MVT::i32:
1715 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1716 break;
1717 case MVT::i64:
1718 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1719 break;
1720 case MVT::f32:
1721 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1722 break;
1723 case MVT::f64:
1724 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1725 break;
1726 }
1727 } else {
1728 switch (EltVT.getSimpleVT().SimpleTy) {
1729 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001730 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001731 case MVT::i8:
1732 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1733 break;
1734 case MVT::i16:
1735 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1736 break;
1737 case MVT::i32:
1738 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1739 break;
1740 case MVT::i64:
1741 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1742 break;
1743 case MVT::f32:
1744 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1745 break;
1746 case MVT::f64:
1747 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1748 break;
1749 }
1750 }
1751 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001752 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001753 case NVPTXISD::LDGV2:
1754 switch (EltVT.getSimpleVT().SimpleTy) {
1755 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001756 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001757 case MVT::i8:
1758 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1759 break;
1760 case MVT::i16:
1761 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1762 break;
1763 case MVT::i32:
1764 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1765 break;
1766 case MVT::i64:
1767 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1768 break;
1769 case MVT::f32:
1770 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1771 break;
1772 case MVT::f64:
1773 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1774 break;
1775 }
1776 break;
1777 case NVPTXISD::LDUV2:
1778 switch (EltVT.getSimpleVT().SimpleTy) {
1779 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001780 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001781 case MVT::i8:
1782 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1783 break;
1784 case MVT::i16:
1785 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1786 break;
1787 case MVT::i32:
1788 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1789 break;
1790 case MVT::i64:
1791 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1792 break;
1793 case MVT::f32:
1794 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1795 break;
1796 case MVT::f64:
1797 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1798 break;
1799 }
1800 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001801 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001802 case NVPTXISD::LDGV4:
1803 switch (EltVT.getSimpleVT().SimpleTy) {
1804 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001805 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001806 case MVT::i8:
1807 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1808 break;
1809 case MVT::i16:
1810 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1811 break;
1812 case MVT::i32:
1813 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1814 break;
1815 case MVT::f32:
1816 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1817 break;
1818 }
1819 break;
1820 case NVPTXISD::LDUV4:
1821 switch (EltVT.getSimpleVT().SimpleTy) {
1822 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001823 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001824 case MVT::i8:
1825 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1826 break;
1827 case MVT::i16:
1828 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1829 break;
1830 case MVT::i32:
1831 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1832 break;
1833 case MVT::f32:
1834 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1835 break;
1836 }
1837 break;
1838 }
1839 } else {
1840 switch (N->getOpcode()) {
1841 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001842 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001843 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001844 case ISD::INTRINSIC_W_CHAIN:
1845 if (IsLDG) {
1846 switch (EltVT.getSimpleVT().SimpleTy) {
1847 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001848 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001849 case MVT::i8:
1850 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1851 break;
1852 case MVT::i16:
1853 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1854 break;
1855 case MVT::i32:
1856 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1857 break;
1858 case MVT::i64:
1859 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1860 break;
1861 case MVT::f32:
1862 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1863 break;
1864 case MVT::f64:
1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1866 break;
1867 }
1868 } else {
1869 switch (EltVT.getSimpleVT().SimpleTy) {
1870 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001871 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001872 case MVT::i8:
1873 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1874 break;
1875 case MVT::i16:
1876 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1877 break;
1878 case MVT::i32:
1879 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1880 break;
1881 case MVT::i64:
1882 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1883 break;
1884 case MVT::f32:
1885 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1886 break;
1887 case MVT::f64:
1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1889 break;
1890 }
1891 }
1892 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001893 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001894 case NVPTXISD::LDGV2:
1895 switch (EltVT.getSimpleVT().SimpleTy) {
1896 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001897 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001898 case MVT::i8:
1899 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1900 break;
1901 case MVT::i16:
1902 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1903 break;
1904 case MVT::i32:
1905 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1906 break;
1907 case MVT::i64:
1908 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1909 break;
1910 case MVT::f32:
1911 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1912 break;
1913 case MVT::f64:
1914 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1915 break;
1916 }
1917 break;
1918 case NVPTXISD::LDUV2:
1919 switch (EltVT.getSimpleVT().SimpleTy) {
1920 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001921 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001922 case MVT::i8:
1923 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1924 break;
1925 case MVT::i16:
1926 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1927 break;
1928 case MVT::i32:
1929 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1930 break;
1931 case MVT::i64:
1932 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1933 break;
1934 case MVT::f32:
1935 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1936 break;
1937 case MVT::f64:
1938 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1939 break;
1940 }
1941 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001942 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001943 case NVPTXISD::LDGV4:
1944 switch (EltVT.getSimpleVT().SimpleTy) {
1945 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001946 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001947 case MVT::i8:
1948 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1949 break;
1950 case MVT::i16:
1951 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1952 break;
1953 case MVT::i32:
1954 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1955 break;
1956 case MVT::f32:
1957 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1958 break;
1959 }
1960 break;
1961 case NVPTXISD::LDUV4:
1962 switch (EltVT.getSimpleVT().SimpleTy) {
1963 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001964 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001965 case MVT::i8:
1966 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1967 break;
1968 case MVT::i16:
1969 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1970 break;
1971 case MVT::i32:
1972 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1973 break;
1974 case MVT::f32:
1975 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1976 break;
1977 }
1978 break;
1979 }
1980 }
1981
1982 SDValue Ops[] = { Base, Offset, Chain };
1983
Justin Holewinskic7997922016-04-05 12:38:01 +00001984 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001985 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001986 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001987 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001988 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001989 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001990 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001991 case ISD::INTRINSIC_W_CHAIN:
1992 if (IsLDG) {
1993 switch (EltVT.getSimpleVT().SimpleTy) {
1994 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001995 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001996 case MVT::i8:
1997 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1998 break;
1999 case MVT::i16:
2000 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
2001 break;
2002 case MVT::i32:
2003 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
2004 break;
2005 case MVT::i64:
2006 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
2007 break;
2008 case MVT::f32:
2009 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
2010 break;
2011 case MVT::f64:
2012 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
2013 break;
2014 }
2015 } else {
2016 switch (EltVT.getSimpleVT().SimpleTy) {
2017 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002018 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002019 case MVT::i8:
2020 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
2021 break;
2022 case MVT::i16:
2023 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
2024 break;
2025 case MVT::i32:
2026 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
2027 break;
2028 case MVT::i64:
2029 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
2030 break;
2031 case MVT::f32:
2032 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
2033 break;
2034 case MVT::f64:
2035 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
2036 break;
2037 }
2038 }
2039 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002040 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00002041 case NVPTXISD::LDGV2:
2042 switch (EltVT.getSimpleVT().SimpleTy) {
2043 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002044 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002045 case MVT::i8:
2046 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
2047 break;
2048 case MVT::i16:
2049 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
2050 break;
2051 case MVT::i32:
2052 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
2053 break;
2054 case MVT::i64:
2055 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
2056 break;
2057 case MVT::f32:
2058 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
2059 break;
2060 case MVT::f64:
2061 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
2062 break;
2063 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002064 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002065 case NVPTXISD::LDUV2:
2066 switch (EltVT.getSimpleVT().SimpleTy) {
2067 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002068 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002069 case MVT::i8:
2070 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
2071 break;
2072 case MVT::i16:
2073 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
2074 break;
2075 case MVT::i32:
2076 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
2077 break;
2078 case MVT::i64:
2079 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
2080 break;
2081 case MVT::f32:
2082 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
2083 break;
2084 case MVT::f64:
2085 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
2086 break;
2087 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002088 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002089 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00002090 case NVPTXISD::LDGV4:
2091 switch (EltVT.getSimpleVT().SimpleTy) {
2092 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002093 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002094 case MVT::i8:
2095 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
2096 break;
2097 case MVT::i16:
2098 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
2099 break;
2100 case MVT::i32:
2101 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
2102 break;
2103 case MVT::f32:
2104 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
2105 break;
2106 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002107 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002108 case NVPTXISD::LDUV4:
2109 switch (EltVT.getSimpleVT().SimpleTy) {
2110 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002111 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002112 case MVT::i8:
2113 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
2114 break;
2115 case MVT::i16:
2116 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
2117 break;
2118 case MVT::i32:
2119 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
2120 break;
2121 case MVT::f32:
2122 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
2123 break;
2124 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002125 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002126 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00002127 } else {
2128 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002129 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002130 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002131 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002132 case ISD::INTRINSIC_W_CHAIN:
2133 if (IsLDG) {
2134 switch (EltVT.getSimpleVT().SimpleTy) {
2135 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002136 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002137 case MVT::i8:
2138 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
2139 break;
2140 case MVT::i16:
2141 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
2142 break;
2143 case MVT::i32:
2144 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
2145 break;
2146 case MVT::i64:
2147 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
2148 break;
2149 case MVT::f32:
2150 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
2151 break;
2152 case MVT::f64:
2153 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
2154 break;
2155 }
2156 } else {
2157 switch (EltVT.getSimpleVT().SimpleTy) {
2158 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002159 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002160 case MVT::i8:
2161 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
2162 break;
2163 case MVT::i16:
2164 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
2165 break;
2166 case MVT::i32:
2167 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
2168 break;
2169 case MVT::i64:
2170 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
2171 break;
2172 case MVT::f32:
2173 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
2174 break;
2175 case MVT::f64:
2176 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
2177 break;
2178 }
2179 }
2180 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002181 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00002182 case NVPTXISD::LDGV2:
2183 switch (EltVT.getSimpleVT().SimpleTy) {
2184 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002185 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002186 case MVT::i8:
2187 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
2188 break;
2189 case MVT::i16:
2190 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
2191 break;
2192 case MVT::i32:
2193 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
2194 break;
2195 case MVT::i64:
2196 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
2197 break;
2198 case MVT::f32:
2199 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
2200 break;
2201 case MVT::f64:
2202 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
2203 break;
2204 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002205 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002206 case NVPTXISD::LDUV2:
2207 switch (EltVT.getSimpleVT().SimpleTy) {
2208 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002209 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002210 case MVT::i8:
2211 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
2212 break;
2213 case MVT::i16:
2214 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
2215 break;
2216 case MVT::i32:
2217 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2218 break;
2219 case MVT::i64:
2220 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2221 break;
2222 case MVT::f32:
2223 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2224 break;
2225 case MVT::f64:
2226 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2227 break;
2228 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002229 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002230 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00002231 case NVPTXISD::LDGV4:
2232 switch (EltVT.getSimpleVT().SimpleTy) {
2233 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002234 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002235 case MVT::i8:
2236 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2237 break;
2238 case MVT::i16:
2239 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2240 break;
2241 case MVT::i32:
2242 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2243 break;
2244 case MVT::f32:
2245 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2246 break;
2247 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002248 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002249 case NVPTXISD::LDUV4:
2250 switch (EltVT.getSimpleVT().SimpleTy) {
2251 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002252 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002253 case MVT::i8:
2254 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2255 break;
2256 case MVT::i16:
2257 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2258 break;
2259 case MVT::i32:
2260 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2261 break;
2262 case MVT::f32:
2263 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2264 break;
2265 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002266 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002267 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002268 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002269
Justin Holewinskie40e9292013-07-01 12:58:52 +00002270 SDValue Ops[] = { Op1, Chain };
Justin Holewinskic7997922016-04-05 12:38:01 +00002271 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00002272 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002273
2274 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002275 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002276 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2277
Justin Holewinskic7997922016-04-05 12:38:01 +00002278 // For automatic generation of LDG (through SelectLoad[Vector], not the
2279 // intrinsics), we may have an extending load like:
2280 //
2281 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2282 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002283 // In this case, the matching logic above will select a load for the original
2284 // memory type (in this case, i8) and our types will not match (the node needs
2285 // to return an i32 in this case). Our LDG/LDU nodes do not support the
2286 // concept of sign-/zero-extension, so emulate it here by adding an explicit
2287 // CVT instruction. Ptxas should clean up any redundancies here.
2288
Justin Holewinskic7997922016-04-05 12:38:01 +00002289 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002290 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00002291
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002292 if (OrigType != EltVT && LdNode) {
2293 // We have an extending-load. The instruction we selected operates on the
2294 // smaller type, but the SDNode we are replacing has the larger type. We
2295 // need to emit a CVT to make the types match.
2296 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2297 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2298 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00002299
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002300 // For each output value, apply the manual sign/zero-extension and make sure
2301 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00002302 for (unsigned i = 0; i != NumElts; ++i) {
2303 SDValue Res(LD, i);
2304 SDValue OrigVal(N, i);
2305
2306 SDNode *CvtNode =
2307 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002308 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2309 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00002310 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2311 }
2312 }
2313
Justin Bogner8d83fb62016-05-13 21:12:53 +00002314 ReplaceNode(N, LD);
2315 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002316}
2317
Justin Bogner8d83fb62016-05-13 21:12:53 +00002318bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002319 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002320 StoreSDNode *ST = cast<StoreSDNode>(N);
2321 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00002322 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002323
2324 // do not support pre/post inc/dec
2325 if (ST->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00002326 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002327
2328 if (!StoreVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00002329 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002330
2331 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002332 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002333
2334 // Volatile Setting
2335 // - .volatile is only availalble for .global and .shared
2336 bool isVolatile = ST->isVolatile();
2337 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2338 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2339 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2340 isVolatile = false;
2341
2342 // Vector Setting
2343 MVT SimpleVT = StoreVT.getSimpleVT();
2344 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002345
2346 // Type Setting: toType + toTypeWidth
2347 // - for integer type, always use 'u'
2348 //
2349 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002350 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Artem Belevich620db1f2017-02-23 22:38:24 +00002351 if (SimpleVT.isVector()) {
2352 assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
2353 // v2f16 is stored using st.b32
2354 toTypeWidth = 32;
2355 }
2356
Justin Holewinskiae556d32012-05-04 20:18:50 +00002357 unsigned int toType;
2358 if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00002359 // f16 uses .b16 as its storage type.
2360 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
2361 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002362 else
2363 toType = NVPTX::PTXLdStInstCode::Unsigned;
2364
2365 // Create the machine instruction DAG
2366 SDValue Chain = N->getOperand(0);
2367 SDValue N1 = N->getOperand(1);
2368 SDValue N2 = N->getOperand(2);
2369 SDValue Addr;
2370 SDValue Offset, Base;
2371 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00002372 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002373
2374 if (SelectDirectAddr(N2, Addr)) {
2375 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002376 case MVT::i8:
2377 Opcode = NVPTX::ST_i8_avar;
2378 break;
2379 case MVT::i16:
2380 Opcode = NVPTX::ST_i16_avar;
2381 break;
2382 case MVT::i32:
2383 Opcode = NVPTX::ST_i32_avar;
2384 break;
2385 case MVT::i64:
2386 Opcode = NVPTX::ST_i64_avar;
2387 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00002388 case MVT::f16:
2389 Opcode = NVPTX::ST_f16_avar;
2390 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002391 case MVT::v2f16:
2392 Opcode = NVPTX::ST_f16x2_avar;
2393 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002394 case MVT::f32:
2395 Opcode = NVPTX::ST_f32_avar;
2396 break;
2397 case MVT::f64:
2398 Opcode = NVPTX::ST_f64_avar;
2399 break;
2400 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002401 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002402 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002403 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2404 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2405 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2406 Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002407 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002408 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2409 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00002410 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002411 case MVT::i8:
2412 Opcode = NVPTX::ST_i8_asi;
2413 break;
2414 case MVT::i16:
2415 Opcode = NVPTX::ST_i16_asi;
2416 break;
2417 case MVT::i32:
2418 Opcode = NVPTX::ST_i32_asi;
2419 break;
2420 case MVT::i64:
2421 Opcode = NVPTX::ST_i64_asi;
2422 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00002423 case MVT::f16:
2424 Opcode = NVPTX::ST_f16_asi;
2425 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002426 case MVT::v2f16:
2427 Opcode = NVPTX::ST_f16x2_asi;
2428 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002429 case MVT::f32:
2430 Opcode = NVPTX::ST_f32_asi;
2431 break;
2432 case MVT::f64:
2433 Opcode = NVPTX::ST_f64_asi;
2434 break;
2435 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002436 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002437 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002438 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2439 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2440 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2441 Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002442 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002443 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2444 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2445 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002446 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002447 case MVT::i8:
2448 Opcode = NVPTX::ST_i8_ari_64;
2449 break;
2450 case MVT::i16:
2451 Opcode = NVPTX::ST_i16_ari_64;
2452 break;
2453 case MVT::i32:
2454 Opcode = NVPTX::ST_i32_ari_64;
2455 break;
2456 case MVT::i64:
2457 Opcode = NVPTX::ST_i64_ari_64;
2458 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00002459 case MVT::f16:
2460 Opcode = NVPTX::ST_f16_ari_64;
2461 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002462 case MVT::v2f16:
2463 Opcode = NVPTX::ST_f16x2_ari_64;
2464 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002465 case MVT::f32:
2466 Opcode = NVPTX::ST_f32_ari_64;
2467 break;
2468 case MVT::f64:
2469 Opcode = NVPTX::ST_f64_ari_64;
2470 break;
2471 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002472 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002473 }
2474 } else {
2475 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002476 case MVT::i8:
2477 Opcode = NVPTX::ST_i8_ari;
2478 break;
2479 case MVT::i16:
2480 Opcode = NVPTX::ST_i16_ari;
2481 break;
2482 case MVT::i32:
2483 Opcode = NVPTX::ST_i32_ari;
2484 break;
2485 case MVT::i64:
2486 Opcode = NVPTX::ST_i64_ari;
2487 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00002488 case MVT::f16:
2489 Opcode = NVPTX::ST_f16_ari;
2490 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002491 case MVT::v2f16:
2492 Opcode = NVPTX::ST_f16x2_ari;
2493 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002494 case MVT::f32:
2495 Opcode = NVPTX::ST_f32_ari;
2496 break;
2497 case MVT::f64:
2498 Opcode = NVPTX::ST_f64_ari;
2499 break;
2500 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002501 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002502 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00002503 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002504 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2505 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2506 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2507 Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002508 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002509 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002510 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002511 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002512 case MVT::i8:
2513 Opcode = NVPTX::ST_i8_areg_64;
2514 break;
2515 case MVT::i16:
2516 Opcode = NVPTX::ST_i16_areg_64;
2517 break;
2518 case MVT::i32:
2519 Opcode = NVPTX::ST_i32_areg_64;
2520 break;
2521 case MVT::i64:
2522 Opcode = NVPTX::ST_i64_areg_64;
2523 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00002524 case MVT::f16:
2525 Opcode = NVPTX::ST_f16_areg_64;
2526 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002527 case MVT::v2f16:
2528 Opcode = NVPTX::ST_f16x2_areg_64;
2529 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002530 case MVT::f32:
2531 Opcode = NVPTX::ST_f32_areg_64;
2532 break;
2533 case MVT::f64:
2534 Opcode = NVPTX::ST_f64_areg_64;
2535 break;
2536 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002537 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002538 }
2539 } else {
2540 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002541 case MVT::i8:
2542 Opcode = NVPTX::ST_i8_areg;
2543 break;
2544 case MVT::i16:
2545 Opcode = NVPTX::ST_i16_areg;
2546 break;
2547 case MVT::i32:
2548 Opcode = NVPTX::ST_i32_areg;
2549 break;
2550 case MVT::i64:
2551 Opcode = NVPTX::ST_i64_areg;
2552 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00002553 case MVT::f16:
2554 Opcode = NVPTX::ST_f16_areg;
2555 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002556 case MVT::v2f16:
2557 Opcode = NVPTX::ST_f16x2_areg;
2558 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002559 case MVT::f32:
2560 Opcode = NVPTX::ST_f32_areg;
2561 break;
2562 case MVT::f64:
2563 Opcode = NVPTX::ST_f64_areg;
2564 break;
2565 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002566 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002567 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00002568 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002569 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2570 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2571 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2572 Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002573 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002574 }
2575
Justin Bogner8d83fb62016-05-13 21:12:53 +00002576 if (!NVPTXST)
2577 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002578
Justin Bogner8d83fb62016-05-13 21:12:53 +00002579 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2580 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2581 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2582 ReplaceNode(N, NVPTXST);
2583 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002584}
2585
Justin Bogner8d83fb62016-05-13 21:12:53 +00002586bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002587 SDValue Chain = N->getOperand(0);
2588 SDValue Op1 = N->getOperand(1);
2589 SDValue Addr, Offset, Base;
2590 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00002591 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002592 SDNode *ST;
2593 EVT EltVT = Op1.getValueType();
2594 MemSDNode *MemSD = cast<MemSDNode>(N);
2595 EVT StoreVT = MemSD->getMemoryVT();
2596
2597 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002598 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002599
2600 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2601 report_fatal_error("Cannot store to pointer that points to constant "
2602 "memory space");
2603 }
2604
2605 // Volatile Setting
2606 // - .volatile is only availalble for .global and .shared
2607 bool IsVolatile = MemSD->isVolatile();
2608 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2609 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2610 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2611 IsVolatile = false;
2612
2613 // Type Setting: toType + toTypeWidth
2614 // - for integer type, always use 'u'
2615 assert(StoreVT.isSimple() && "Store value is not simple");
2616 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002617 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002618 unsigned ToType;
2619 if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00002620 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
2621 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002622 else
2623 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2624
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002625 SmallVector<SDValue, 12> StOps;
2626 SDValue N2;
2627 unsigned VecType;
2628
2629 switch (N->getOpcode()) {
2630 case NVPTXISD::StoreV2:
2631 VecType = NVPTX::PTXLdStInstCode::V2;
2632 StOps.push_back(N->getOperand(1));
2633 StOps.push_back(N->getOperand(2));
2634 N2 = N->getOperand(3);
2635 break;
2636 case NVPTXISD::StoreV4:
2637 VecType = NVPTX::PTXLdStInstCode::V4;
2638 StOps.push_back(N->getOperand(1));
2639 StOps.push_back(N->getOperand(2));
2640 StOps.push_back(N->getOperand(3));
2641 StOps.push_back(N->getOperand(4));
2642 N2 = N->getOperand(5);
2643 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002644 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002645 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002646 }
2647
Artem Belevich620db1f2017-02-23 22:38:24 +00002648 // v8f16 is a special case. PTX doesn't have st.v8.f16
2649 // instruction. Instead, we split the vector into v2f16 chunks and
2650 // store them with st.v4.b32.
2651 if (EltVT == MVT::v2f16) {
2652 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
2653 EltVT = MVT::i32;
2654 ToType = NVPTX::PTXLdStInstCode::Untyped;
2655 ToTypeWidth = 32;
2656 }
2657
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002658 StOps.push_back(getI32Imm(IsVolatile, DL));
2659 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2660 StOps.push_back(getI32Imm(VecType, DL));
2661 StOps.push_back(getI32Imm(ToType, DL));
2662 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002663
2664 if (SelectDirectAddr(N2, Addr)) {
2665 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002666 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002667 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002668 case NVPTXISD::StoreV2:
2669 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002670 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002671 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002672 case MVT::i8:
2673 Opcode = NVPTX::STV_i8_v2_avar;
2674 break;
2675 case MVT::i16:
2676 Opcode = NVPTX::STV_i16_v2_avar;
2677 break;
2678 case MVT::i32:
2679 Opcode = NVPTX::STV_i32_v2_avar;
2680 break;
2681 case MVT::i64:
2682 Opcode = NVPTX::STV_i64_v2_avar;
2683 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002684 case MVT::f16:
2685 Opcode = NVPTX::STV_f16_v2_avar;
2686 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002687 case MVT::f32:
2688 Opcode = NVPTX::STV_f32_v2_avar;
2689 break;
2690 case MVT::f64:
2691 Opcode = NVPTX::STV_f64_v2_avar;
2692 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002693 }
2694 break;
2695 case NVPTXISD::StoreV4:
2696 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002697 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002698 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002699 case MVT::i8:
2700 Opcode = NVPTX::STV_i8_v4_avar;
2701 break;
2702 case MVT::i16:
2703 Opcode = NVPTX::STV_i16_v4_avar;
2704 break;
2705 case MVT::i32:
2706 Opcode = NVPTX::STV_i32_v4_avar;
2707 break;
2708 case MVT::f32:
2709 Opcode = NVPTX::STV_f32_v4_avar;
2710 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002711 }
2712 break;
2713 }
2714 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00002715 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2716 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002717 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002718 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002719 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002720 case NVPTXISD::StoreV2:
2721 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002722 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002723 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002724 case MVT::i8:
2725 Opcode = NVPTX::STV_i8_v2_asi;
2726 break;
2727 case MVT::i16:
2728 Opcode = NVPTX::STV_i16_v2_asi;
2729 break;
2730 case MVT::i32:
2731 Opcode = NVPTX::STV_i32_v2_asi;
2732 break;
2733 case MVT::i64:
2734 Opcode = NVPTX::STV_i64_v2_asi;
2735 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002736 case MVT::f16:
2737 Opcode = NVPTX::STV_f16_v2_asi;
2738 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002739 case MVT::f32:
2740 Opcode = NVPTX::STV_f32_v2_asi;
2741 break;
2742 case MVT::f64:
2743 Opcode = NVPTX::STV_f64_v2_asi;
2744 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002745 }
2746 break;
2747 case NVPTXISD::StoreV4:
2748 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002749 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002750 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002751 case MVT::i8:
2752 Opcode = NVPTX::STV_i8_v4_asi;
2753 break;
2754 case MVT::i16:
2755 Opcode = NVPTX::STV_i16_v4_asi;
2756 break;
2757 case MVT::i32:
2758 Opcode = NVPTX::STV_i32_v4_asi;
2759 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002760 case MVT::f16:
2761 Opcode = NVPTX::STV_f16_v4_asi;
2762 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002763 case MVT::f32:
2764 Opcode = NVPTX::STV_f32_v4_asi;
2765 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002766 }
2767 break;
2768 }
2769 StOps.push_back(Base);
2770 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00002771 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2772 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2773 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002774 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002775 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002776 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002777 case NVPTXISD::StoreV2:
2778 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002779 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002780 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002781 case MVT::i8:
2782 Opcode = NVPTX::STV_i8_v2_ari_64;
2783 break;
2784 case MVT::i16:
2785 Opcode = NVPTX::STV_i16_v2_ari_64;
2786 break;
2787 case MVT::i32:
2788 Opcode = NVPTX::STV_i32_v2_ari_64;
2789 break;
2790 case MVT::i64:
2791 Opcode = NVPTX::STV_i64_v2_ari_64;
2792 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002793 case MVT::f16:
2794 Opcode = NVPTX::STV_f16_v2_ari_64;
2795 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002796 case MVT::f32:
2797 Opcode = NVPTX::STV_f32_v2_ari_64;
2798 break;
2799 case MVT::f64:
2800 Opcode = NVPTX::STV_f64_v2_ari_64;
2801 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002802 }
2803 break;
2804 case NVPTXISD::StoreV4:
2805 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002806 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002807 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002808 case MVT::i8:
2809 Opcode = NVPTX::STV_i8_v4_ari_64;
2810 break;
2811 case MVT::i16:
2812 Opcode = NVPTX::STV_i16_v4_ari_64;
2813 break;
2814 case MVT::i32:
2815 Opcode = NVPTX::STV_i32_v4_ari_64;
2816 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002817 case MVT::f16:
2818 Opcode = NVPTX::STV_f16_v4_ari_64;
2819 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002820 case MVT::f32:
2821 Opcode = NVPTX::STV_f32_v4_ari_64;
2822 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002823 }
2824 break;
2825 }
2826 } else {
2827 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002828 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002829 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002830 case NVPTXISD::StoreV2:
2831 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002832 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002833 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002834 case MVT::i8:
2835 Opcode = NVPTX::STV_i8_v2_ari;
2836 break;
2837 case MVT::i16:
2838 Opcode = NVPTX::STV_i16_v2_ari;
2839 break;
2840 case MVT::i32:
2841 Opcode = NVPTX::STV_i32_v2_ari;
2842 break;
2843 case MVT::i64:
2844 Opcode = NVPTX::STV_i64_v2_ari;
2845 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002846 case MVT::f16:
2847 Opcode = NVPTX::STV_f16_v2_ari;
2848 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002849 case MVT::f32:
2850 Opcode = NVPTX::STV_f32_v2_ari;
2851 break;
2852 case MVT::f64:
2853 Opcode = NVPTX::STV_f64_v2_ari;
2854 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002855 }
2856 break;
2857 case NVPTXISD::StoreV4:
2858 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002859 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002860 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002861 case MVT::i8:
2862 Opcode = NVPTX::STV_i8_v4_ari;
2863 break;
2864 case MVT::i16:
2865 Opcode = NVPTX::STV_i16_v4_ari;
2866 break;
2867 case MVT::i32:
2868 Opcode = NVPTX::STV_i32_v4_ari;
2869 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002870 case MVT::f16:
2871 Opcode = NVPTX::STV_f16_v4_ari;
2872 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002873 case MVT::f32:
2874 Opcode = NVPTX::STV_f32_v4_ari;
2875 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002876 }
2877 break;
2878 }
2879 }
2880 StOps.push_back(Base);
2881 StOps.push_back(Offset);
2882 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002883 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002884 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002885 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002886 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002887 case NVPTXISD::StoreV2:
2888 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002889 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002890 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002891 case MVT::i8:
2892 Opcode = NVPTX::STV_i8_v2_areg_64;
2893 break;
2894 case MVT::i16:
2895 Opcode = NVPTX::STV_i16_v2_areg_64;
2896 break;
2897 case MVT::i32:
2898 Opcode = NVPTX::STV_i32_v2_areg_64;
2899 break;
2900 case MVT::i64:
2901 Opcode = NVPTX::STV_i64_v2_areg_64;
2902 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002903 case MVT::f16:
2904 Opcode = NVPTX::STV_f16_v2_areg_64;
2905 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002906 case MVT::f32:
2907 Opcode = NVPTX::STV_f32_v2_areg_64;
2908 break;
2909 case MVT::f64:
2910 Opcode = NVPTX::STV_f64_v2_areg_64;
2911 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002912 }
2913 break;
2914 case NVPTXISD::StoreV4:
2915 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002916 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002917 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002918 case MVT::i8:
2919 Opcode = NVPTX::STV_i8_v4_areg_64;
2920 break;
2921 case MVT::i16:
2922 Opcode = NVPTX::STV_i16_v4_areg_64;
2923 break;
2924 case MVT::i32:
2925 Opcode = NVPTX::STV_i32_v4_areg_64;
2926 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002927 case MVT::f16:
2928 Opcode = NVPTX::STV_f16_v4_areg_64;
2929 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002930 case MVT::f32:
2931 Opcode = NVPTX::STV_f32_v4_areg_64;
2932 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002933 }
2934 break;
2935 }
2936 } else {
2937 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002938 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002939 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002940 case NVPTXISD::StoreV2:
2941 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002942 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002943 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002944 case MVT::i8:
2945 Opcode = NVPTX::STV_i8_v2_areg;
2946 break;
2947 case MVT::i16:
2948 Opcode = NVPTX::STV_i16_v2_areg;
2949 break;
2950 case MVT::i32:
2951 Opcode = NVPTX::STV_i32_v2_areg;
2952 break;
2953 case MVT::i64:
2954 Opcode = NVPTX::STV_i64_v2_areg;
2955 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002956 case MVT::f16:
2957 Opcode = NVPTX::STV_f16_v2_areg;
2958 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002959 case MVT::f32:
2960 Opcode = NVPTX::STV_f32_v2_areg;
2961 break;
2962 case MVT::f64:
2963 Opcode = NVPTX::STV_f64_v2_areg;
2964 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002965 }
2966 break;
2967 case NVPTXISD::StoreV4:
2968 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002969 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002970 return false;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002971 case MVT::i8:
2972 Opcode = NVPTX::STV_i8_v4_areg;
2973 break;
2974 case MVT::i16:
2975 Opcode = NVPTX::STV_i16_v4_areg;
2976 break;
2977 case MVT::i32:
2978 Opcode = NVPTX::STV_i32_v4_areg;
2979 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00002980 case MVT::f16:
2981 Opcode = NVPTX::STV_f16_v4_areg;
2982 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002983 case MVT::f32:
2984 Opcode = NVPTX::STV_f32_v4_areg;
2985 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002986 }
2987 break;
2988 }
2989 }
2990 StOps.push_back(N2);
2991 }
2992
2993 StOps.push_back(Chain);
2994
Michael Liaob53d8962013-04-19 22:22:57 +00002995 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002996
2997 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2998 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2999 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
3000
Justin Bogner8d83fb62016-05-13 21:12:53 +00003001 ReplaceNode(N, ST);
3002 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00003003}
3004
Justin Bogner8d83fb62016-05-13 21:12:53 +00003005bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00003006 SDValue Chain = Node->getOperand(0);
3007 SDValue Offset = Node->getOperand(2);
3008 SDValue Flag = Node->getOperand(3);
3009 SDLoc DL(Node);
3010 MemSDNode *Mem = cast<MemSDNode>(Node);
3011
3012 unsigned VecSize;
3013 switch (Node->getOpcode()) {
3014 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003015 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003016 case NVPTXISD::LoadParam:
3017 VecSize = 1;
3018 break;
3019 case NVPTXISD::LoadParamV2:
3020 VecSize = 2;
3021 break;
3022 case NVPTXISD::LoadParamV4:
3023 VecSize = 4;
3024 break;
3025 }
3026
3027 EVT EltVT = Node->getValueType(0);
3028 EVT MemVT = Mem->getMemoryVT();
3029
3030 unsigned Opc = 0;
3031
3032 switch (VecSize) {
3033 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003034 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003035 case 1:
3036 switch (MemVT.getSimpleVT().SimpleTy) {
3037 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003038 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003039 case MVT::i1:
3040 Opc = NVPTX::LoadParamMemI8;
3041 break;
3042 case MVT::i8:
3043 Opc = NVPTX::LoadParamMemI8;
3044 break;
3045 case MVT::i16:
3046 Opc = NVPTX::LoadParamMemI16;
3047 break;
3048 case MVT::i32:
3049 Opc = NVPTX::LoadParamMemI32;
3050 break;
3051 case MVT::i64:
3052 Opc = NVPTX::LoadParamMemI64;
3053 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00003054 case MVT::f16:
3055 Opc = NVPTX::LoadParamMemF16;
3056 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003057 case MVT::v2f16:
3058 Opc = NVPTX::LoadParamMemF16x2;
3059 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003060 case MVT::f32:
3061 Opc = NVPTX::LoadParamMemF32;
3062 break;
3063 case MVT::f64:
3064 Opc = NVPTX::LoadParamMemF64;
3065 break;
3066 }
3067 break;
3068 case 2:
3069 switch (MemVT.getSimpleVT().SimpleTy) {
3070 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003071 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003072 case MVT::i1:
3073 Opc = NVPTX::LoadParamMemV2I8;
3074 break;
3075 case MVT::i8:
3076 Opc = NVPTX::LoadParamMemV2I8;
3077 break;
3078 case MVT::i16:
3079 Opc = NVPTX::LoadParamMemV2I16;
3080 break;
3081 case MVT::i32:
3082 Opc = NVPTX::LoadParamMemV2I32;
3083 break;
3084 case MVT::i64:
3085 Opc = NVPTX::LoadParamMemV2I64;
3086 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003087 case MVT::f16:
3088 Opc = NVPTX::LoadParamMemV2F16;
3089 break;
3090 case MVT::v2f16:
3091 Opc = NVPTX::LoadParamMemV2F16x2;
3092 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003093 case MVT::f32:
3094 Opc = NVPTX::LoadParamMemV2F32;
3095 break;
3096 case MVT::f64:
3097 Opc = NVPTX::LoadParamMemV2F64;
3098 break;
3099 }
3100 break;
3101 case 4:
3102 switch (MemVT.getSimpleVT().SimpleTy) {
3103 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003104 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003105 case MVT::i1:
3106 Opc = NVPTX::LoadParamMemV4I8;
3107 break;
3108 case MVT::i8:
3109 Opc = NVPTX::LoadParamMemV4I8;
3110 break;
3111 case MVT::i16:
3112 Opc = NVPTX::LoadParamMemV4I16;
3113 break;
3114 case MVT::i32:
3115 Opc = NVPTX::LoadParamMemV4I32;
3116 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003117 case MVT::f16:
3118 Opc = NVPTX::LoadParamMemV4F16;
3119 break;
3120 case MVT::v2f16:
3121 Opc = NVPTX::LoadParamMemV4F16x2;
3122 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003123 case MVT::f32:
3124 Opc = NVPTX::LoadParamMemV4F32;
3125 break;
3126 }
3127 break;
3128 }
3129
3130 SDVTList VTs;
3131 if (VecSize == 1) {
3132 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
3133 } else if (VecSize == 2) {
3134 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
3135 } else {
3136 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00003137 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003138 }
3139
3140 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
3141
3142 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003143 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00003144 Ops.push_back(Chain);
3145 Ops.push_back(Flag);
3146
Justin Bogner8d83fb62016-05-13 21:12:53 +00003147 ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
3148 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003149}
3150
Justin Bogner8d83fb62016-05-13 21:12:53 +00003151bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00003152 SDLoc DL(N);
3153 SDValue Chain = N->getOperand(0);
3154 SDValue Offset = N->getOperand(1);
3155 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
3156 MemSDNode *Mem = cast<MemSDNode>(N);
3157
3158 // How many elements do we have?
3159 unsigned NumElts = 1;
3160 switch (N->getOpcode()) {
3161 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003162 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003163 case NVPTXISD::StoreRetval:
3164 NumElts = 1;
3165 break;
3166 case NVPTXISD::StoreRetvalV2:
3167 NumElts = 2;
3168 break;
3169 case NVPTXISD::StoreRetvalV4:
3170 NumElts = 4;
3171 break;
3172 }
3173
3174 // Build vector of operands
3175 SmallVector<SDValue, 6> Ops;
3176 for (unsigned i = 0; i < NumElts; ++i)
3177 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003178 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00003179 Ops.push_back(Chain);
3180
3181 // Determine target opcode
3182 // If we have an i1, use an 8-bit store. The lowering code in
3183 // NVPTXISelLowering will have already emitted an upcast.
3184 unsigned Opcode = 0;
3185 switch (NumElts) {
3186 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003187 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003188 case 1:
3189 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3190 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003191 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003192 case MVT::i1:
3193 Opcode = NVPTX::StoreRetvalI8;
3194 break;
3195 case MVT::i8:
3196 Opcode = NVPTX::StoreRetvalI8;
3197 break;
3198 case MVT::i16:
3199 Opcode = NVPTX::StoreRetvalI16;
3200 break;
3201 case MVT::i32:
3202 Opcode = NVPTX::StoreRetvalI32;
3203 break;
3204 case MVT::i64:
3205 Opcode = NVPTX::StoreRetvalI64;
3206 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00003207 case MVT::f16:
3208 Opcode = NVPTX::StoreRetvalF16;
3209 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003210 case MVT::v2f16:
3211 Opcode = NVPTX::StoreRetvalF16x2;
3212 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003213 case MVT::f32:
3214 Opcode = NVPTX::StoreRetvalF32;
3215 break;
3216 case MVT::f64:
3217 Opcode = NVPTX::StoreRetvalF64;
3218 break;
3219 }
3220 break;
3221 case 2:
3222 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3223 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003224 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003225 case MVT::i1:
3226 Opcode = NVPTX::StoreRetvalV2I8;
3227 break;
3228 case MVT::i8:
3229 Opcode = NVPTX::StoreRetvalV2I8;
3230 break;
3231 case MVT::i16:
3232 Opcode = NVPTX::StoreRetvalV2I16;
3233 break;
3234 case MVT::i32:
3235 Opcode = NVPTX::StoreRetvalV2I32;
3236 break;
3237 case MVT::i64:
3238 Opcode = NVPTX::StoreRetvalV2I64;
3239 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003240 case MVT::f16:
3241 Opcode = NVPTX::StoreRetvalV2F16;
3242 break;
3243 case MVT::v2f16:
3244 Opcode = NVPTX::StoreRetvalV2F16x2;
3245 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003246 case MVT::f32:
3247 Opcode = NVPTX::StoreRetvalV2F32;
3248 break;
3249 case MVT::f64:
3250 Opcode = NVPTX::StoreRetvalV2F64;
3251 break;
3252 }
3253 break;
3254 case 4:
3255 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3256 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003257 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003258 case MVT::i1:
3259 Opcode = NVPTX::StoreRetvalV4I8;
3260 break;
3261 case MVT::i8:
3262 Opcode = NVPTX::StoreRetvalV4I8;
3263 break;
3264 case MVT::i16:
3265 Opcode = NVPTX::StoreRetvalV4I16;
3266 break;
3267 case MVT::i32:
3268 Opcode = NVPTX::StoreRetvalV4I32;
3269 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003270 case MVT::f16:
3271 Opcode = NVPTX::StoreRetvalV4F16;
3272 break;
3273 case MVT::v2f16:
3274 Opcode = NVPTX::StoreRetvalV4F16x2;
3275 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003276 case MVT::f32:
3277 Opcode = NVPTX::StoreRetvalV4F32;
3278 break;
3279 }
3280 break;
3281 }
3282
Artem Belevich620db1f2017-02-23 22:38:24 +00003283 SDNode *Ret = CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003284 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3285 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3286 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3287
Justin Bogner8d83fb62016-05-13 21:12:53 +00003288 ReplaceNode(N, Ret);
3289 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003290}
3291
Justin Bogner8d83fb62016-05-13 21:12:53 +00003292bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00003293 SDLoc DL(N);
3294 SDValue Chain = N->getOperand(0);
3295 SDValue Param = N->getOperand(1);
3296 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
3297 SDValue Offset = N->getOperand(2);
3298 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
3299 MemSDNode *Mem = cast<MemSDNode>(N);
3300 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
3301
3302 // How many elements do we have?
3303 unsigned NumElts = 1;
3304 switch (N->getOpcode()) {
3305 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003306 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003307 case NVPTXISD::StoreParamU32:
3308 case NVPTXISD::StoreParamS32:
3309 case NVPTXISD::StoreParam:
3310 NumElts = 1;
3311 break;
3312 case NVPTXISD::StoreParamV2:
3313 NumElts = 2;
3314 break;
3315 case NVPTXISD::StoreParamV4:
3316 NumElts = 4;
3317 break;
3318 }
3319
3320 // Build vector of operands
3321 SmallVector<SDValue, 8> Ops;
3322 for (unsigned i = 0; i < NumElts; ++i)
3323 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003324 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3325 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00003326 Ops.push_back(Chain);
3327 Ops.push_back(Flag);
3328
3329 // Determine target opcode
3330 // If we have an i1, use an 8-bit store. The lowering code in
3331 // NVPTXISelLowering will have already emitted an upcast.
3332 unsigned Opcode = 0;
3333 switch (N->getOpcode()) {
3334 default:
3335 switch (NumElts) {
3336 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003337 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003338 case 1:
3339 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3340 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003341 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003342 case MVT::i1:
3343 Opcode = NVPTX::StoreParamI8;
3344 break;
3345 case MVT::i8:
3346 Opcode = NVPTX::StoreParamI8;
3347 break;
3348 case MVT::i16:
3349 Opcode = NVPTX::StoreParamI16;
3350 break;
3351 case MVT::i32:
3352 Opcode = NVPTX::StoreParamI32;
3353 break;
3354 case MVT::i64:
3355 Opcode = NVPTX::StoreParamI64;
3356 break;
Artem Belevich64dc9be2017-01-13 20:56:17 +00003357 case MVT::f16:
3358 Opcode = NVPTX::StoreParamF16;
3359 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003360 case MVT::v2f16:
3361 Opcode = NVPTX::StoreParamF16x2;
3362 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003363 case MVT::f32:
3364 Opcode = NVPTX::StoreParamF32;
3365 break;
3366 case MVT::f64:
3367 Opcode = NVPTX::StoreParamF64;
3368 break;
3369 }
3370 break;
3371 case 2:
3372 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3373 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003374 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003375 case MVT::i1:
3376 Opcode = NVPTX::StoreParamV2I8;
3377 break;
3378 case MVT::i8:
3379 Opcode = NVPTX::StoreParamV2I8;
3380 break;
3381 case MVT::i16:
3382 Opcode = NVPTX::StoreParamV2I16;
3383 break;
3384 case MVT::i32:
3385 Opcode = NVPTX::StoreParamV2I32;
3386 break;
3387 case MVT::i64:
3388 Opcode = NVPTX::StoreParamV2I64;
3389 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003390 case MVT::f16:
3391 Opcode = NVPTX::StoreParamV2F16;
3392 break;
3393 case MVT::v2f16:
3394 Opcode = NVPTX::StoreParamV2F16x2;
3395 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003396 case MVT::f32:
3397 Opcode = NVPTX::StoreParamV2F32;
3398 break;
3399 case MVT::f64:
3400 Opcode = NVPTX::StoreParamV2F64;
3401 break;
3402 }
3403 break;
3404 case 4:
3405 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3406 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00003407 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003408 case MVT::i1:
3409 Opcode = NVPTX::StoreParamV4I8;
3410 break;
3411 case MVT::i8:
3412 Opcode = NVPTX::StoreParamV4I8;
3413 break;
3414 case MVT::i16:
3415 Opcode = NVPTX::StoreParamV4I16;
3416 break;
3417 case MVT::i32:
3418 Opcode = NVPTX::StoreParamV4I32;
3419 break;
Artem Belevich620db1f2017-02-23 22:38:24 +00003420 case MVT::f16:
3421 Opcode = NVPTX::StoreParamV4F16;
3422 break;
3423 case MVT::v2f16:
3424 Opcode = NVPTX::StoreParamV4F16x2;
3425 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003426 case MVT::f32:
3427 Opcode = NVPTX::StoreParamV4F32;
3428 break;
3429 }
3430 break;
3431 }
3432 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003433 // Special case: if we have a sign-extend/zero-extend node, insert the
3434 // conversion instruction first, and use that as the value operand to
3435 // the selected StoreParam node.
3436 case NVPTXISD::StoreParamU32: {
3437 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003438 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003439 MVT::i32);
3440 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3441 MVT::i32, Ops[0], CvtNone);
3442 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003443 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003444 }
3445 case NVPTXISD::StoreParamS32: {
3446 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003447 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003448 MVT::i32);
3449 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3450 MVT::i32, Ops[0], CvtNone);
3451 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003452 break;
3453 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003454 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00003455
Justin Holewinskidff28d22013-07-01 12:59:01 +00003456 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003457 SDNode *Ret =
Justin Holewinskidff28d22013-07-01 12:59:01 +00003458 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003459 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3460 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3461 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3462
Justin Bogner8d83fb62016-05-13 21:12:53 +00003463 ReplaceNode(N, Ret);
3464 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003465}
3466
Justin Bogner8d83fb62016-05-13 21:12:53 +00003467bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00003468 SDValue Chain = N->getOperand(0);
Justin Holewinski30d56a72014-04-09 15:39:15 +00003469 unsigned Opc = 0;
3470 SmallVector<SDValue, 8> Ops;
3471
3472 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003473 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003474 case NVPTXISD::Tex1DFloatS32:
3475 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003476 break;
3477 case NVPTXISD::Tex1DFloatFloat:
3478 Opc = NVPTX::TEX_1D_F32_F32;
3479 break;
3480 case NVPTXISD::Tex1DFloatFloatLevel:
3481 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3482 break;
3483 case NVPTXISD::Tex1DFloatFloatGrad:
3484 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3485 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003486 case NVPTXISD::Tex1DS32S32:
3487 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003488 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003489 case NVPTXISD::Tex1DS32Float:
3490 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003491 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003492 case NVPTXISD::Tex1DS32FloatLevel:
3493 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003494 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003495 case NVPTXISD::Tex1DS32FloatGrad:
3496 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003497 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003498 case NVPTXISD::Tex1DU32S32:
3499 Opc = NVPTX::TEX_1D_U32_S32;
3500 break;
3501 case NVPTXISD::Tex1DU32Float:
3502 Opc = NVPTX::TEX_1D_U32_F32;
3503 break;
3504 case NVPTXISD::Tex1DU32FloatLevel:
3505 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3506 break;
3507 case NVPTXISD::Tex1DU32FloatGrad:
3508 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3509 break;
3510 case NVPTXISD::Tex1DArrayFloatS32:
3511 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003512 break;
3513 case NVPTXISD::Tex1DArrayFloatFloat:
3514 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3515 break;
3516 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3517 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3518 break;
3519 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3520 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3521 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003522 case NVPTXISD::Tex1DArrayS32S32:
3523 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003524 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003525 case NVPTXISD::Tex1DArrayS32Float:
3526 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003527 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003528 case NVPTXISD::Tex1DArrayS32FloatLevel:
3529 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003530 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003531 case NVPTXISD::Tex1DArrayS32FloatGrad:
3532 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003533 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003534 case NVPTXISD::Tex1DArrayU32S32:
3535 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3536 break;
3537 case NVPTXISD::Tex1DArrayU32Float:
3538 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3539 break;
3540 case NVPTXISD::Tex1DArrayU32FloatLevel:
3541 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3542 break;
3543 case NVPTXISD::Tex1DArrayU32FloatGrad:
3544 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3545 break;
3546 case NVPTXISD::Tex2DFloatS32:
3547 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003548 break;
3549 case NVPTXISD::Tex2DFloatFloat:
3550 Opc = NVPTX::TEX_2D_F32_F32;
3551 break;
3552 case NVPTXISD::Tex2DFloatFloatLevel:
3553 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3554 break;
3555 case NVPTXISD::Tex2DFloatFloatGrad:
3556 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3557 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003558 case NVPTXISD::Tex2DS32S32:
3559 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003560 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003561 case NVPTXISD::Tex2DS32Float:
3562 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003563 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003564 case NVPTXISD::Tex2DS32FloatLevel:
3565 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003566 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003567 case NVPTXISD::Tex2DS32FloatGrad:
3568 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003569 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003570 case NVPTXISD::Tex2DU32S32:
3571 Opc = NVPTX::TEX_2D_U32_S32;
3572 break;
3573 case NVPTXISD::Tex2DU32Float:
3574 Opc = NVPTX::TEX_2D_U32_F32;
3575 break;
3576 case NVPTXISD::Tex2DU32FloatLevel:
3577 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3578 break;
3579 case NVPTXISD::Tex2DU32FloatGrad:
3580 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3581 break;
3582 case NVPTXISD::Tex2DArrayFloatS32:
3583 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003584 break;
3585 case NVPTXISD::Tex2DArrayFloatFloat:
3586 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3587 break;
3588 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3589 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3590 break;
3591 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3592 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3593 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003594 case NVPTXISD::Tex2DArrayS32S32:
3595 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003596 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003597 case NVPTXISD::Tex2DArrayS32Float:
3598 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003599 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003600 case NVPTXISD::Tex2DArrayS32FloatLevel:
3601 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003602 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003603 case NVPTXISD::Tex2DArrayS32FloatGrad:
3604 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003605 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003606 case NVPTXISD::Tex2DArrayU32S32:
3607 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3608 break;
3609 case NVPTXISD::Tex2DArrayU32Float:
3610 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3611 break;
3612 case NVPTXISD::Tex2DArrayU32FloatLevel:
3613 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3614 break;
3615 case NVPTXISD::Tex2DArrayU32FloatGrad:
3616 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3617 break;
3618 case NVPTXISD::Tex3DFloatS32:
3619 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003620 break;
3621 case NVPTXISD::Tex3DFloatFloat:
3622 Opc = NVPTX::TEX_3D_F32_F32;
3623 break;
3624 case NVPTXISD::Tex3DFloatFloatLevel:
3625 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3626 break;
3627 case NVPTXISD::Tex3DFloatFloatGrad:
3628 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3629 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003630 case NVPTXISD::Tex3DS32S32:
3631 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003632 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003633 case NVPTXISD::Tex3DS32Float:
3634 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003635 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003636 case NVPTXISD::Tex3DS32FloatLevel:
3637 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003638 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003639 case NVPTXISD::Tex3DS32FloatGrad:
3640 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3641 break;
3642 case NVPTXISD::Tex3DU32S32:
3643 Opc = NVPTX::TEX_3D_U32_S32;
3644 break;
3645 case NVPTXISD::Tex3DU32Float:
3646 Opc = NVPTX::TEX_3D_U32_F32;
3647 break;
3648 case NVPTXISD::Tex3DU32FloatLevel:
3649 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3650 break;
3651 case NVPTXISD::Tex3DU32FloatGrad:
3652 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3653 break;
3654 case NVPTXISD::TexCubeFloatFloat:
3655 Opc = NVPTX::TEX_CUBE_F32_F32;
3656 break;
3657 case NVPTXISD::TexCubeFloatFloatLevel:
3658 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3659 break;
3660 case NVPTXISD::TexCubeS32Float:
3661 Opc = NVPTX::TEX_CUBE_S32_F32;
3662 break;
3663 case NVPTXISD::TexCubeS32FloatLevel:
3664 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3665 break;
3666 case NVPTXISD::TexCubeU32Float:
3667 Opc = NVPTX::TEX_CUBE_U32_F32;
3668 break;
3669 case NVPTXISD::TexCubeU32FloatLevel:
3670 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3671 break;
3672 case NVPTXISD::TexCubeArrayFloatFloat:
3673 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3674 break;
3675 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3676 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3677 break;
3678 case NVPTXISD::TexCubeArrayS32Float:
3679 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3680 break;
3681 case NVPTXISD::TexCubeArrayS32FloatLevel:
3682 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3683 break;
3684 case NVPTXISD::TexCubeArrayU32Float:
3685 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3686 break;
3687 case NVPTXISD::TexCubeArrayU32FloatLevel:
3688 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3689 break;
3690 case NVPTXISD::Tld4R2DFloatFloat:
3691 Opc = NVPTX::TLD4_R_2D_F32_F32;
3692 break;
3693 case NVPTXISD::Tld4G2DFloatFloat:
3694 Opc = NVPTX::TLD4_G_2D_F32_F32;
3695 break;
3696 case NVPTXISD::Tld4B2DFloatFloat:
3697 Opc = NVPTX::TLD4_B_2D_F32_F32;
3698 break;
3699 case NVPTXISD::Tld4A2DFloatFloat:
3700 Opc = NVPTX::TLD4_A_2D_F32_F32;
3701 break;
3702 case NVPTXISD::Tld4R2DS64Float:
3703 Opc = NVPTX::TLD4_R_2D_S32_F32;
3704 break;
3705 case NVPTXISD::Tld4G2DS64Float:
3706 Opc = NVPTX::TLD4_G_2D_S32_F32;
3707 break;
3708 case NVPTXISD::Tld4B2DS64Float:
3709 Opc = NVPTX::TLD4_B_2D_S32_F32;
3710 break;
3711 case NVPTXISD::Tld4A2DS64Float:
3712 Opc = NVPTX::TLD4_A_2D_S32_F32;
3713 break;
3714 case NVPTXISD::Tld4R2DU64Float:
3715 Opc = NVPTX::TLD4_R_2D_U32_F32;
3716 break;
3717 case NVPTXISD::Tld4G2DU64Float:
3718 Opc = NVPTX::TLD4_G_2D_U32_F32;
3719 break;
3720 case NVPTXISD::Tld4B2DU64Float:
3721 Opc = NVPTX::TLD4_B_2D_U32_F32;
3722 break;
3723 case NVPTXISD::Tld4A2DU64Float:
3724 Opc = NVPTX::TLD4_A_2D_U32_F32;
3725 break;
3726 case NVPTXISD::TexUnified1DFloatS32:
3727 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3728 break;
3729 case NVPTXISD::TexUnified1DFloatFloat:
3730 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3731 break;
3732 case NVPTXISD::TexUnified1DFloatFloatLevel:
3733 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3734 break;
3735 case NVPTXISD::TexUnified1DFloatFloatGrad:
3736 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3737 break;
3738 case NVPTXISD::TexUnified1DS32S32:
3739 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3740 break;
3741 case NVPTXISD::TexUnified1DS32Float:
3742 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3743 break;
3744 case NVPTXISD::TexUnified1DS32FloatLevel:
3745 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3746 break;
3747 case NVPTXISD::TexUnified1DS32FloatGrad:
3748 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3749 break;
3750 case NVPTXISD::TexUnified1DU32S32:
3751 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3752 break;
3753 case NVPTXISD::TexUnified1DU32Float:
3754 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3755 break;
3756 case NVPTXISD::TexUnified1DU32FloatLevel:
3757 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3758 break;
3759 case NVPTXISD::TexUnified1DU32FloatGrad:
3760 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3761 break;
3762 case NVPTXISD::TexUnified1DArrayFloatS32:
3763 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3764 break;
3765 case NVPTXISD::TexUnified1DArrayFloatFloat:
3766 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3767 break;
3768 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3769 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3770 break;
3771 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3772 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3773 break;
3774 case NVPTXISD::TexUnified1DArrayS32S32:
3775 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3776 break;
3777 case NVPTXISD::TexUnified1DArrayS32Float:
3778 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3779 break;
3780 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3781 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3782 break;
3783 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3784 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3785 break;
3786 case NVPTXISD::TexUnified1DArrayU32S32:
3787 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3788 break;
3789 case NVPTXISD::TexUnified1DArrayU32Float:
3790 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3791 break;
3792 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3793 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3794 break;
3795 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3796 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3797 break;
3798 case NVPTXISD::TexUnified2DFloatS32:
3799 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3800 break;
3801 case NVPTXISD::TexUnified2DFloatFloat:
3802 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3803 break;
3804 case NVPTXISD::TexUnified2DFloatFloatLevel:
3805 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3806 break;
3807 case NVPTXISD::TexUnified2DFloatFloatGrad:
3808 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3809 break;
3810 case NVPTXISD::TexUnified2DS32S32:
3811 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3812 break;
3813 case NVPTXISD::TexUnified2DS32Float:
3814 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3815 break;
3816 case NVPTXISD::TexUnified2DS32FloatLevel:
3817 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3818 break;
3819 case NVPTXISD::TexUnified2DS32FloatGrad:
3820 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3821 break;
3822 case NVPTXISD::TexUnified2DU32S32:
3823 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3824 break;
3825 case NVPTXISD::TexUnified2DU32Float:
3826 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3827 break;
3828 case NVPTXISD::TexUnified2DU32FloatLevel:
3829 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3830 break;
3831 case NVPTXISD::TexUnified2DU32FloatGrad:
3832 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3833 break;
3834 case NVPTXISD::TexUnified2DArrayFloatS32:
3835 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3836 break;
3837 case NVPTXISD::TexUnified2DArrayFloatFloat:
3838 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3839 break;
3840 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3841 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3842 break;
3843 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3844 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3845 break;
3846 case NVPTXISD::TexUnified2DArrayS32S32:
3847 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3848 break;
3849 case NVPTXISD::TexUnified2DArrayS32Float:
3850 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3851 break;
3852 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3853 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3854 break;
3855 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3856 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3857 break;
3858 case NVPTXISD::TexUnified2DArrayU32S32:
3859 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3860 break;
3861 case NVPTXISD::TexUnified2DArrayU32Float:
3862 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3863 break;
3864 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3865 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3866 break;
3867 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3868 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3869 break;
3870 case NVPTXISD::TexUnified3DFloatS32:
3871 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3872 break;
3873 case NVPTXISD::TexUnified3DFloatFloat:
3874 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3875 break;
3876 case NVPTXISD::TexUnified3DFloatFloatLevel:
3877 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3878 break;
3879 case NVPTXISD::TexUnified3DFloatFloatGrad:
3880 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3881 break;
3882 case NVPTXISD::TexUnified3DS32S32:
3883 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3884 break;
3885 case NVPTXISD::TexUnified3DS32Float:
3886 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3887 break;
3888 case NVPTXISD::TexUnified3DS32FloatLevel:
3889 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3890 break;
3891 case NVPTXISD::TexUnified3DS32FloatGrad:
3892 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3893 break;
3894 case NVPTXISD::TexUnified3DU32S32:
3895 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3896 break;
3897 case NVPTXISD::TexUnified3DU32Float:
3898 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3899 break;
3900 case NVPTXISD::TexUnified3DU32FloatLevel:
3901 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3902 break;
3903 case NVPTXISD::TexUnified3DU32FloatGrad:
3904 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3905 break;
3906 case NVPTXISD::TexUnifiedCubeFloatFloat:
3907 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3908 break;
3909 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3910 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3911 break;
3912 case NVPTXISD::TexUnifiedCubeS32Float:
3913 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3914 break;
3915 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3916 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3917 break;
3918 case NVPTXISD::TexUnifiedCubeU32Float:
3919 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3920 break;
3921 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3922 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3923 break;
3924 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3925 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3926 break;
3927 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3928 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3929 break;
3930 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3931 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3932 break;
3933 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3934 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3935 break;
3936 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3937 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3938 break;
3939 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3940 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3941 break;
3942 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3943 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3944 break;
3945 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3946 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3947 break;
3948 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3949 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3950 break;
3951 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3952 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3953 break;
3954 case NVPTXISD::Tld4UnifiedR2DS64Float:
3955 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3956 break;
3957 case NVPTXISD::Tld4UnifiedG2DS64Float:
3958 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3959 break;
3960 case NVPTXISD::Tld4UnifiedB2DS64Float:
3961 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3962 break;
3963 case NVPTXISD::Tld4UnifiedA2DS64Float:
3964 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3965 break;
3966 case NVPTXISD::Tld4UnifiedR2DU64Float:
3967 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3968 break;
3969 case NVPTXISD::Tld4UnifiedG2DU64Float:
3970 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3971 break;
3972 case NVPTXISD::Tld4UnifiedB2DU64Float:
3973 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3974 break;
3975 case NVPTXISD::Tld4UnifiedA2DU64Float:
3976 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003977 break;
3978 }
3979
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003980 // Copy over operands
3981 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00003982 Ops.push_back(N->getOperand(i));
3983 }
3984
3985 Ops.push_back(Chain);
Justin Bogner8d83fb62016-05-13 21:12:53 +00003986 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3987 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003988}
3989
Justin Bogner8d83fb62016-05-13 21:12:53 +00003990bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00003991 SDValue Chain = N->getOperand(0);
3992 SDValue TexHandle = N->getOperand(1);
Justin Holewinski30d56a72014-04-09 15:39:15 +00003993 unsigned Opc = 0;
3994 SmallVector<SDValue, 8> Ops;
3995 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003996 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003997 case NVPTXISD::Suld1DI8Clamp:
3998 Opc = NVPTX::SULD_1D_I8_CLAMP;
3999 Ops.push_back(TexHandle);
4000 Ops.push_back(N->getOperand(2));
4001 Ops.push_back(Chain);
4002 break;
4003 case NVPTXISD::Suld1DI16Clamp:
4004 Opc = NVPTX::SULD_1D_I16_CLAMP;
4005 Ops.push_back(TexHandle);
4006 Ops.push_back(N->getOperand(2));
4007 Ops.push_back(Chain);
4008 break;
4009 case NVPTXISD::Suld1DI32Clamp:
4010 Opc = NVPTX::SULD_1D_I32_CLAMP;
4011 Ops.push_back(TexHandle);
4012 Ops.push_back(N->getOperand(2));
4013 Ops.push_back(Chain);
4014 break;
4015 case NVPTXISD::Suld1DI64Clamp:
4016 Opc = NVPTX::SULD_1D_I64_CLAMP;
4017 Ops.push_back(TexHandle);
4018 Ops.push_back(N->getOperand(2));
4019 Ops.push_back(Chain);
4020 break;
4021 case NVPTXISD::Suld1DV2I8Clamp:
4022 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
4023 Ops.push_back(TexHandle);
4024 Ops.push_back(N->getOperand(2));
4025 Ops.push_back(Chain);
4026 break;
4027 case NVPTXISD::Suld1DV2I16Clamp:
4028 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
4029 Ops.push_back(TexHandle);
4030 Ops.push_back(N->getOperand(2));
4031 Ops.push_back(Chain);
4032 break;
4033 case NVPTXISD::Suld1DV2I32Clamp:
4034 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
4035 Ops.push_back(TexHandle);
4036 Ops.push_back(N->getOperand(2));
4037 Ops.push_back(Chain);
4038 break;
4039 case NVPTXISD::Suld1DV2I64Clamp:
4040 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
4041 Ops.push_back(TexHandle);
4042 Ops.push_back(N->getOperand(2));
4043 Ops.push_back(Chain);
4044 break;
4045 case NVPTXISD::Suld1DV4I8Clamp:
4046 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
4047 Ops.push_back(TexHandle);
4048 Ops.push_back(N->getOperand(2));
4049 Ops.push_back(Chain);
4050 break;
4051 case NVPTXISD::Suld1DV4I16Clamp:
4052 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
4053 Ops.push_back(TexHandle);
4054 Ops.push_back(N->getOperand(2));
4055 Ops.push_back(Chain);
4056 break;
4057 case NVPTXISD::Suld1DV4I32Clamp:
4058 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
4059 Ops.push_back(TexHandle);
4060 Ops.push_back(N->getOperand(2));
4061 Ops.push_back(Chain);
4062 break;
4063 case NVPTXISD::Suld1DArrayI8Clamp:
4064 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
4065 Ops.push_back(TexHandle);
4066 Ops.push_back(N->getOperand(2));
4067 Ops.push_back(N->getOperand(3));
4068 Ops.push_back(Chain);
4069 break;
4070 case NVPTXISD::Suld1DArrayI16Clamp:
4071 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
4072 Ops.push_back(TexHandle);
4073 Ops.push_back(N->getOperand(2));
4074 Ops.push_back(N->getOperand(3));
4075 Ops.push_back(Chain);
4076 break;
4077 case NVPTXISD::Suld1DArrayI32Clamp:
4078 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
4079 Ops.push_back(TexHandle);
4080 Ops.push_back(N->getOperand(2));
4081 Ops.push_back(N->getOperand(3));
4082 Ops.push_back(Chain);
4083 break;
4084 case NVPTXISD::Suld1DArrayI64Clamp:
4085 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
4086 Ops.push_back(TexHandle);
4087 Ops.push_back(N->getOperand(2));
4088 Ops.push_back(N->getOperand(3));
4089 Ops.push_back(Chain);
4090 break;
4091 case NVPTXISD::Suld1DArrayV2I8Clamp:
4092 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
4093 Ops.push_back(TexHandle);
4094 Ops.push_back(N->getOperand(2));
4095 Ops.push_back(N->getOperand(3));
4096 Ops.push_back(Chain);
4097 break;
4098 case NVPTXISD::Suld1DArrayV2I16Clamp:
4099 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
4100 Ops.push_back(TexHandle);
4101 Ops.push_back(N->getOperand(2));
4102 Ops.push_back(N->getOperand(3));
4103 Ops.push_back(Chain);
4104 break;
4105 case NVPTXISD::Suld1DArrayV2I32Clamp:
4106 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
4107 Ops.push_back(TexHandle);
4108 Ops.push_back(N->getOperand(2));
4109 Ops.push_back(N->getOperand(3));
4110 Ops.push_back(Chain);
4111 break;
4112 case NVPTXISD::Suld1DArrayV2I64Clamp:
4113 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
4114 Ops.push_back(TexHandle);
4115 Ops.push_back(N->getOperand(2));
4116 Ops.push_back(N->getOperand(3));
4117 Ops.push_back(Chain);
4118 break;
4119 case NVPTXISD::Suld1DArrayV4I8Clamp:
4120 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
4121 Ops.push_back(TexHandle);
4122 Ops.push_back(N->getOperand(2));
4123 Ops.push_back(N->getOperand(3));
4124 Ops.push_back(Chain);
4125 break;
4126 case NVPTXISD::Suld1DArrayV4I16Clamp:
4127 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
4128 Ops.push_back(TexHandle);
4129 Ops.push_back(N->getOperand(2));
4130 Ops.push_back(N->getOperand(3));
4131 Ops.push_back(Chain);
4132 break;
4133 case NVPTXISD::Suld1DArrayV4I32Clamp:
4134 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
4135 Ops.push_back(TexHandle);
4136 Ops.push_back(N->getOperand(2));
4137 Ops.push_back(N->getOperand(3));
4138 Ops.push_back(Chain);
4139 break;
4140 case NVPTXISD::Suld2DI8Clamp:
4141 Opc = NVPTX::SULD_2D_I8_CLAMP;
4142 Ops.push_back(TexHandle);
4143 Ops.push_back(N->getOperand(2));
4144 Ops.push_back(N->getOperand(3));
4145 Ops.push_back(Chain);
4146 break;
4147 case NVPTXISD::Suld2DI16Clamp:
4148 Opc = NVPTX::SULD_2D_I16_CLAMP;
4149 Ops.push_back(TexHandle);
4150 Ops.push_back(N->getOperand(2));
4151 Ops.push_back(N->getOperand(3));
4152 Ops.push_back(Chain);
4153 break;
4154 case NVPTXISD::Suld2DI32Clamp:
4155 Opc = NVPTX::SULD_2D_I32_CLAMP;
4156 Ops.push_back(TexHandle);
4157 Ops.push_back(N->getOperand(2));
4158 Ops.push_back(N->getOperand(3));
4159 Ops.push_back(Chain);
4160 break;
4161 case NVPTXISD::Suld2DI64Clamp:
4162 Opc = NVPTX::SULD_2D_I64_CLAMP;
4163 Ops.push_back(TexHandle);
4164 Ops.push_back(N->getOperand(2));
4165 Ops.push_back(N->getOperand(3));
4166 Ops.push_back(Chain);
4167 break;
4168 case NVPTXISD::Suld2DV2I8Clamp:
4169 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
4170 Ops.push_back(TexHandle);
4171 Ops.push_back(N->getOperand(2));
4172 Ops.push_back(N->getOperand(3));
4173 Ops.push_back(Chain);
4174 break;
4175 case NVPTXISD::Suld2DV2I16Clamp:
4176 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
4177 Ops.push_back(TexHandle);
4178 Ops.push_back(N->getOperand(2));
4179 Ops.push_back(N->getOperand(3));
4180 Ops.push_back(Chain);
4181 break;
4182 case NVPTXISD::Suld2DV2I32Clamp:
4183 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
4184 Ops.push_back(TexHandle);
4185 Ops.push_back(N->getOperand(2));
4186 Ops.push_back(N->getOperand(3));
4187 Ops.push_back(Chain);
4188 break;
4189 case NVPTXISD::Suld2DV2I64Clamp:
4190 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(Chain);
4195 break;
4196 case NVPTXISD::Suld2DV4I8Clamp:
4197 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
4198 Ops.push_back(TexHandle);
4199 Ops.push_back(N->getOperand(2));
4200 Ops.push_back(N->getOperand(3));
4201 Ops.push_back(Chain);
4202 break;
4203 case NVPTXISD::Suld2DV4I16Clamp:
4204 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
4205 Ops.push_back(TexHandle);
4206 Ops.push_back(N->getOperand(2));
4207 Ops.push_back(N->getOperand(3));
4208 Ops.push_back(Chain);
4209 break;
4210 case NVPTXISD::Suld2DV4I32Clamp:
4211 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
4212 Ops.push_back(TexHandle);
4213 Ops.push_back(N->getOperand(2));
4214 Ops.push_back(N->getOperand(3));
4215 Ops.push_back(Chain);
4216 break;
4217 case NVPTXISD::Suld2DArrayI8Clamp:
4218 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
4219 Ops.push_back(TexHandle);
4220 Ops.push_back(N->getOperand(2));
4221 Ops.push_back(N->getOperand(3));
4222 Ops.push_back(N->getOperand(4));
4223 Ops.push_back(Chain);
4224 break;
4225 case NVPTXISD::Suld2DArrayI16Clamp:
4226 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
4227 Ops.push_back(TexHandle);
4228 Ops.push_back(N->getOperand(2));
4229 Ops.push_back(N->getOperand(3));
4230 Ops.push_back(N->getOperand(4));
4231 Ops.push_back(Chain);
4232 break;
4233 case NVPTXISD::Suld2DArrayI32Clamp:
4234 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
4235 Ops.push_back(TexHandle);
4236 Ops.push_back(N->getOperand(2));
4237 Ops.push_back(N->getOperand(3));
4238 Ops.push_back(N->getOperand(4));
4239 Ops.push_back(Chain);
4240 break;
4241 case NVPTXISD::Suld2DArrayI64Clamp:
4242 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
4243 Ops.push_back(TexHandle);
4244 Ops.push_back(N->getOperand(2));
4245 Ops.push_back(N->getOperand(3));
4246 Ops.push_back(N->getOperand(4));
4247 Ops.push_back(Chain);
4248 break;
4249 case NVPTXISD::Suld2DArrayV2I8Clamp:
4250 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
4251 Ops.push_back(TexHandle);
4252 Ops.push_back(N->getOperand(2));
4253 Ops.push_back(N->getOperand(3));
4254 Ops.push_back(N->getOperand(4));
4255 Ops.push_back(Chain);
4256 break;
4257 case NVPTXISD::Suld2DArrayV2I16Clamp:
4258 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
4259 Ops.push_back(TexHandle);
4260 Ops.push_back(N->getOperand(2));
4261 Ops.push_back(N->getOperand(3));
4262 Ops.push_back(N->getOperand(4));
4263 Ops.push_back(Chain);
4264 break;
4265 case NVPTXISD::Suld2DArrayV2I32Clamp:
4266 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
4267 Ops.push_back(TexHandle);
4268 Ops.push_back(N->getOperand(2));
4269 Ops.push_back(N->getOperand(3));
4270 Ops.push_back(N->getOperand(4));
4271 Ops.push_back(Chain);
4272 break;
4273 case NVPTXISD::Suld2DArrayV2I64Clamp:
4274 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
4275 Ops.push_back(TexHandle);
4276 Ops.push_back(N->getOperand(2));
4277 Ops.push_back(N->getOperand(3));
4278 Ops.push_back(N->getOperand(4));
4279 Ops.push_back(Chain);
4280 break;
4281 case NVPTXISD::Suld2DArrayV4I8Clamp:
4282 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
4283 Ops.push_back(TexHandle);
4284 Ops.push_back(N->getOperand(2));
4285 Ops.push_back(N->getOperand(3));
4286 Ops.push_back(N->getOperand(4));
4287 Ops.push_back(Chain);
4288 break;
4289 case NVPTXISD::Suld2DArrayV4I16Clamp:
4290 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
4291 Ops.push_back(TexHandle);
4292 Ops.push_back(N->getOperand(2));
4293 Ops.push_back(N->getOperand(3));
4294 Ops.push_back(N->getOperand(4));
4295 Ops.push_back(Chain);
4296 break;
4297 case NVPTXISD::Suld2DArrayV4I32Clamp:
4298 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
4299 Ops.push_back(TexHandle);
4300 Ops.push_back(N->getOperand(2));
4301 Ops.push_back(N->getOperand(3));
4302 Ops.push_back(N->getOperand(4));
4303 Ops.push_back(Chain);
4304 break;
4305 case NVPTXISD::Suld3DI8Clamp:
4306 Opc = NVPTX::SULD_3D_I8_CLAMP;
4307 Ops.push_back(TexHandle);
4308 Ops.push_back(N->getOperand(2));
4309 Ops.push_back(N->getOperand(3));
4310 Ops.push_back(N->getOperand(4));
4311 Ops.push_back(Chain);
4312 break;
4313 case NVPTXISD::Suld3DI16Clamp:
4314 Opc = NVPTX::SULD_3D_I16_CLAMP;
4315 Ops.push_back(TexHandle);
4316 Ops.push_back(N->getOperand(2));
4317 Ops.push_back(N->getOperand(3));
4318 Ops.push_back(N->getOperand(4));
4319 Ops.push_back(Chain);
4320 break;
4321 case NVPTXISD::Suld3DI32Clamp:
4322 Opc = NVPTX::SULD_3D_I32_CLAMP;
4323 Ops.push_back(TexHandle);
4324 Ops.push_back(N->getOperand(2));
4325 Ops.push_back(N->getOperand(3));
4326 Ops.push_back(N->getOperand(4));
4327 Ops.push_back(Chain);
4328 break;
4329 case NVPTXISD::Suld3DI64Clamp:
4330 Opc = NVPTX::SULD_3D_I64_CLAMP;
4331 Ops.push_back(TexHandle);
4332 Ops.push_back(N->getOperand(2));
4333 Ops.push_back(N->getOperand(3));
4334 Ops.push_back(N->getOperand(4));
4335 Ops.push_back(Chain);
4336 break;
4337 case NVPTXISD::Suld3DV2I8Clamp:
4338 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4339 Ops.push_back(TexHandle);
4340 Ops.push_back(N->getOperand(2));
4341 Ops.push_back(N->getOperand(3));
4342 Ops.push_back(N->getOperand(4));
4343 Ops.push_back(Chain);
4344 break;
4345 case NVPTXISD::Suld3DV2I16Clamp:
4346 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4347 Ops.push_back(TexHandle);
4348 Ops.push_back(N->getOperand(2));
4349 Ops.push_back(N->getOperand(3));
4350 Ops.push_back(N->getOperand(4));
4351 Ops.push_back(Chain);
4352 break;
4353 case NVPTXISD::Suld3DV2I32Clamp:
4354 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4355 Ops.push_back(TexHandle);
4356 Ops.push_back(N->getOperand(2));
4357 Ops.push_back(N->getOperand(3));
4358 Ops.push_back(N->getOperand(4));
4359 Ops.push_back(Chain);
4360 break;
4361 case NVPTXISD::Suld3DV2I64Clamp:
4362 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4363 Ops.push_back(TexHandle);
4364 Ops.push_back(N->getOperand(2));
4365 Ops.push_back(N->getOperand(3));
4366 Ops.push_back(N->getOperand(4));
4367 Ops.push_back(Chain);
4368 break;
4369 case NVPTXISD::Suld3DV4I8Clamp:
4370 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4371 Ops.push_back(TexHandle);
4372 Ops.push_back(N->getOperand(2));
4373 Ops.push_back(N->getOperand(3));
4374 Ops.push_back(N->getOperand(4));
4375 Ops.push_back(Chain);
4376 break;
4377 case NVPTXISD::Suld3DV4I16Clamp:
4378 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4379 Ops.push_back(TexHandle);
4380 Ops.push_back(N->getOperand(2));
4381 Ops.push_back(N->getOperand(3));
4382 Ops.push_back(N->getOperand(4));
4383 Ops.push_back(Chain);
4384 break;
4385 case NVPTXISD::Suld3DV4I32Clamp:
4386 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4387 Ops.push_back(TexHandle);
4388 Ops.push_back(N->getOperand(2));
4389 Ops.push_back(N->getOperand(3));
4390 Ops.push_back(N->getOperand(4));
4391 Ops.push_back(Chain);
4392 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004393 case NVPTXISD::Suld1DI8Trap:
4394 Opc = NVPTX::SULD_1D_I8_TRAP;
4395 Ops.push_back(TexHandle);
4396 Ops.push_back(N->getOperand(2));
4397 Ops.push_back(Chain);
4398 break;
4399 case NVPTXISD::Suld1DI16Trap:
4400 Opc = NVPTX::SULD_1D_I16_TRAP;
4401 Ops.push_back(TexHandle);
4402 Ops.push_back(N->getOperand(2));
4403 Ops.push_back(Chain);
4404 break;
4405 case NVPTXISD::Suld1DI32Trap:
4406 Opc = NVPTX::SULD_1D_I32_TRAP;
4407 Ops.push_back(TexHandle);
4408 Ops.push_back(N->getOperand(2));
4409 Ops.push_back(Chain);
4410 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004411 case NVPTXISD::Suld1DI64Trap:
4412 Opc = NVPTX::SULD_1D_I64_TRAP;
4413 Ops.push_back(TexHandle);
4414 Ops.push_back(N->getOperand(2));
4415 Ops.push_back(Chain);
4416 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004417 case NVPTXISD::Suld1DV2I8Trap:
4418 Opc = NVPTX::SULD_1D_V2I8_TRAP;
4419 Ops.push_back(TexHandle);
4420 Ops.push_back(N->getOperand(2));
4421 Ops.push_back(Chain);
4422 break;
4423 case NVPTXISD::Suld1DV2I16Trap:
4424 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4425 Ops.push_back(TexHandle);
4426 Ops.push_back(N->getOperand(2));
4427 Ops.push_back(Chain);
4428 break;
4429 case NVPTXISD::Suld1DV2I32Trap:
4430 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4431 Ops.push_back(TexHandle);
4432 Ops.push_back(N->getOperand(2));
4433 Ops.push_back(Chain);
4434 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004435 case NVPTXISD::Suld1DV2I64Trap:
4436 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4437 Ops.push_back(TexHandle);
4438 Ops.push_back(N->getOperand(2));
4439 Ops.push_back(Chain);
4440 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004441 case NVPTXISD::Suld1DV4I8Trap:
4442 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4443 Ops.push_back(TexHandle);
4444 Ops.push_back(N->getOperand(2));
4445 Ops.push_back(Chain);
4446 break;
4447 case NVPTXISD::Suld1DV4I16Trap:
4448 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4449 Ops.push_back(TexHandle);
4450 Ops.push_back(N->getOperand(2));
4451 Ops.push_back(Chain);
4452 break;
4453 case NVPTXISD::Suld1DV4I32Trap:
4454 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4455 Ops.push_back(TexHandle);
4456 Ops.push_back(N->getOperand(2));
4457 Ops.push_back(Chain);
4458 break;
4459 case NVPTXISD::Suld1DArrayI8Trap:
4460 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4461 Ops.push_back(TexHandle);
4462 Ops.push_back(N->getOperand(2));
4463 Ops.push_back(N->getOperand(3));
4464 Ops.push_back(Chain);
4465 break;
4466 case NVPTXISD::Suld1DArrayI16Trap:
4467 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4468 Ops.push_back(TexHandle);
4469 Ops.push_back(N->getOperand(2));
4470 Ops.push_back(N->getOperand(3));
4471 Ops.push_back(Chain);
4472 break;
4473 case NVPTXISD::Suld1DArrayI32Trap:
4474 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4475 Ops.push_back(TexHandle);
4476 Ops.push_back(N->getOperand(2));
4477 Ops.push_back(N->getOperand(3));
4478 Ops.push_back(Chain);
4479 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004480 case NVPTXISD::Suld1DArrayI64Trap:
4481 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4482 Ops.push_back(TexHandle);
4483 Ops.push_back(N->getOperand(2));
4484 Ops.push_back(N->getOperand(3));
4485 Ops.push_back(Chain);
4486 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004487 case NVPTXISD::Suld1DArrayV2I8Trap:
4488 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4489 Ops.push_back(TexHandle);
4490 Ops.push_back(N->getOperand(2));
4491 Ops.push_back(N->getOperand(3));
4492 Ops.push_back(Chain);
4493 break;
4494 case NVPTXISD::Suld1DArrayV2I16Trap:
4495 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4496 Ops.push_back(TexHandle);
4497 Ops.push_back(N->getOperand(2));
4498 Ops.push_back(N->getOperand(3));
4499 Ops.push_back(Chain);
4500 break;
4501 case NVPTXISD::Suld1DArrayV2I32Trap:
4502 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4503 Ops.push_back(TexHandle);
4504 Ops.push_back(N->getOperand(2));
4505 Ops.push_back(N->getOperand(3));
4506 Ops.push_back(Chain);
4507 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004508 case NVPTXISD::Suld1DArrayV2I64Trap:
4509 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4510 Ops.push_back(TexHandle);
4511 Ops.push_back(N->getOperand(2));
4512 Ops.push_back(N->getOperand(3));
4513 Ops.push_back(Chain);
4514 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004515 case NVPTXISD::Suld1DArrayV4I8Trap:
4516 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4517 Ops.push_back(TexHandle);
4518 Ops.push_back(N->getOperand(2));
4519 Ops.push_back(N->getOperand(3));
4520 Ops.push_back(Chain);
4521 break;
4522 case NVPTXISD::Suld1DArrayV4I16Trap:
4523 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4524 Ops.push_back(TexHandle);
4525 Ops.push_back(N->getOperand(2));
4526 Ops.push_back(N->getOperand(3));
4527 Ops.push_back(Chain);
4528 break;
4529 case NVPTXISD::Suld1DArrayV4I32Trap:
4530 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4531 Ops.push_back(TexHandle);
4532 Ops.push_back(N->getOperand(2));
4533 Ops.push_back(N->getOperand(3));
4534 Ops.push_back(Chain);
4535 break;
4536 case NVPTXISD::Suld2DI8Trap:
4537 Opc = NVPTX::SULD_2D_I8_TRAP;
4538 Ops.push_back(TexHandle);
4539 Ops.push_back(N->getOperand(2));
4540 Ops.push_back(N->getOperand(3));
4541 Ops.push_back(Chain);
4542 break;
4543 case NVPTXISD::Suld2DI16Trap:
4544 Opc = NVPTX::SULD_2D_I16_TRAP;
4545 Ops.push_back(TexHandle);
4546 Ops.push_back(N->getOperand(2));
4547 Ops.push_back(N->getOperand(3));
4548 Ops.push_back(Chain);
4549 break;
4550 case NVPTXISD::Suld2DI32Trap:
4551 Opc = NVPTX::SULD_2D_I32_TRAP;
4552 Ops.push_back(TexHandle);
4553 Ops.push_back(N->getOperand(2));
4554 Ops.push_back(N->getOperand(3));
4555 Ops.push_back(Chain);
4556 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004557 case NVPTXISD::Suld2DI64Trap:
4558 Opc = NVPTX::SULD_2D_I64_TRAP;
4559 Ops.push_back(TexHandle);
4560 Ops.push_back(N->getOperand(2));
4561 Ops.push_back(N->getOperand(3));
4562 Ops.push_back(Chain);
4563 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004564 case NVPTXISD::Suld2DV2I8Trap:
4565 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4566 Ops.push_back(TexHandle);
4567 Ops.push_back(N->getOperand(2));
4568 Ops.push_back(N->getOperand(3));
4569 Ops.push_back(Chain);
4570 break;
4571 case NVPTXISD::Suld2DV2I16Trap:
4572 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4573 Ops.push_back(TexHandle);
4574 Ops.push_back(N->getOperand(2));
4575 Ops.push_back(N->getOperand(3));
4576 Ops.push_back(Chain);
4577 break;
4578 case NVPTXISD::Suld2DV2I32Trap:
4579 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4580 Ops.push_back(TexHandle);
4581 Ops.push_back(N->getOperand(2));
4582 Ops.push_back(N->getOperand(3));
4583 Ops.push_back(Chain);
4584 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004585 case NVPTXISD::Suld2DV2I64Trap:
4586 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(Chain);
4591 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004592 case NVPTXISD::Suld2DV4I8Trap:
4593 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4594 Ops.push_back(TexHandle);
4595 Ops.push_back(N->getOperand(2));
4596 Ops.push_back(N->getOperand(3));
4597 Ops.push_back(Chain);
4598 break;
4599 case NVPTXISD::Suld2DV4I16Trap:
4600 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4601 Ops.push_back(TexHandle);
4602 Ops.push_back(N->getOperand(2));
4603 Ops.push_back(N->getOperand(3));
4604 Ops.push_back(Chain);
4605 break;
4606 case NVPTXISD::Suld2DV4I32Trap:
4607 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4608 Ops.push_back(TexHandle);
4609 Ops.push_back(N->getOperand(2));
4610 Ops.push_back(N->getOperand(3));
4611 Ops.push_back(Chain);
4612 break;
4613 case NVPTXISD::Suld2DArrayI8Trap:
4614 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4615 Ops.push_back(TexHandle);
4616 Ops.push_back(N->getOperand(2));
4617 Ops.push_back(N->getOperand(3));
4618 Ops.push_back(N->getOperand(4));
4619 Ops.push_back(Chain);
4620 break;
4621 case NVPTXISD::Suld2DArrayI16Trap:
4622 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4623 Ops.push_back(TexHandle);
4624 Ops.push_back(N->getOperand(2));
4625 Ops.push_back(N->getOperand(3));
4626 Ops.push_back(N->getOperand(4));
4627 Ops.push_back(Chain);
4628 break;
4629 case NVPTXISD::Suld2DArrayI32Trap:
4630 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4631 Ops.push_back(TexHandle);
4632 Ops.push_back(N->getOperand(2));
4633 Ops.push_back(N->getOperand(3));
4634 Ops.push_back(N->getOperand(4));
4635 Ops.push_back(Chain);
4636 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004637 case NVPTXISD::Suld2DArrayI64Trap:
4638 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4639 Ops.push_back(TexHandle);
4640 Ops.push_back(N->getOperand(2));
4641 Ops.push_back(N->getOperand(3));
4642 Ops.push_back(N->getOperand(4));
4643 Ops.push_back(Chain);
4644 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004645 case NVPTXISD::Suld2DArrayV2I8Trap:
4646 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4647 Ops.push_back(TexHandle);
4648 Ops.push_back(N->getOperand(2));
4649 Ops.push_back(N->getOperand(3));
4650 Ops.push_back(N->getOperand(4));
4651 Ops.push_back(Chain);
4652 break;
4653 case NVPTXISD::Suld2DArrayV2I16Trap:
4654 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4655 Ops.push_back(TexHandle);
4656 Ops.push_back(N->getOperand(2));
4657 Ops.push_back(N->getOperand(3));
4658 Ops.push_back(N->getOperand(4));
4659 Ops.push_back(Chain);
4660 break;
4661 case NVPTXISD::Suld2DArrayV2I32Trap:
4662 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4663 Ops.push_back(TexHandle);
4664 Ops.push_back(N->getOperand(2));
4665 Ops.push_back(N->getOperand(3));
4666 Ops.push_back(N->getOperand(4));
4667 Ops.push_back(Chain);
4668 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004669 case NVPTXISD::Suld2DArrayV2I64Trap:
4670 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4671 Ops.push_back(TexHandle);
4672 Ops.push_back(N->getOperand(2));
4673 Ops.push_back(N->getOperand(3));
4674 Ops.push_back(N->getOperand(4));
4675 Ops.push_back(Chain);
4676 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004677 case NVPTXISD::Suld2DArrayV4I8Trap:
4678 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4679 Ops.push_back(TexHandle);
4680 Ops.push_back(N->getOperand(2));
4681 Ops.push_back(N->getOperand(3));
4682 Ops.push_back(N->getOperand(4));
4683 Ops.push_back(Chain);
4684 break;
4685 case NVPTXISD::Suld2DArrayV4I16Trap:
4686 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4687 Ops.push_back(TexHandle);
4688 Ops.push_back(N->getOperand(2));
4689 Ops.push_back(N->getOperand(3));
4690 Ops.push_back(N->getOperand(4));
4691 Ops.push_back(Chain);
4692 break;
4693 case NVPTXISD::Suld2DArrayV4I32Trap:
4694 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4695 Ops.push_back(TexHandle);
4696 Ops.push_back(N->getOperand(2));
4697 Ops.push_back(N->getOperand(3));
4698 Ops.push_back(N->getOperand(4));
4699 Ops.push_back(Chain);
4700 break;
4701 case NVPTXISD::Suld3DI8Trap:
4702 Opc = NVPTX::SULD_3D_I8_TRAP;
4703 Ops.push_back(TexHandle);
4704 Ops.push_back(N->getOperand(2));
4705 Ops.push_back(N->getOperand(3));
4706 Ops.push_back(N->getOperand(4));
4707 Ops.push_back(Chain);
4708 break;
4709 case NVPTXISD::Suld3DI16Trap:
4710 Opc = NVPTX::SULD_3D_I16_TRAP;
4711 Ops.push_back(TexHandle);
4712 Ops.push_back(N->getOperand(2));
4713 Ops.push_back(N->getOperand(3));
4714 Ops.push_back(N->getOperand(4));
4715 Ops.push_back(Chain);
4716 break;
4717 case NVPTXISD::Suld3DI32Trap:
4718 Opc = NVPTX::SULD_3D_I32_TRAP;
4719 Ops.push_back(TexHandle);
4720 Ops.push_back(N->getOperand(2));
4721 Ops.push_back(N->getOperand(3));
4722 Ops.push_back(N->getOperand(4));
4723 Ops.push_back(Chain);
4724 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004725 case NVPTXISD::Suld3DI64Trap:
4726 Opc = NVPTX::SULD_3D_I64_TRAP;
4727 Ops.push_back(TexHandle);
4728 Ops.push_back(N->getOperand(2));
4729 Ops.push_back(N->getOperand(3));
4730 Ops.push_back(N->getOperand(4));
4731 Ops.push_back(Chain);
4732 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004733 case NVPTXISD::Suld3DV2I8Trap:
4734 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4735 Ops.push_back(TexHandle);
4736 Ops.push_back(N->getOperand(2));
4737 Ops.push_back(N->getOperand(3));
4738 Ops.push_back(N->getOperand(4));
4739 Ops.push_back(Chain);
4740 break;
4741 case NVPTXISD::Suld3DV2I16Trap:
4742 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4743 Ops.push_back(TexHandle);
4744 Ops.push_back(N->getOperand(2));
4745 Ops.push_back(N->getOperand(3));
4746 Ops.push_back(N->getOperand(4));
4747 Ops.push_back(Chain);
4748 break;
4749 case NVPTXISD::Suld3DV2I32Trap:
4750 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4751 Ops.push_back(TexHandle);
4752 Ops.push_back(N->getOperand(2));
4753 Ops.push_back(N->getOperand(3));
4754 Ops.push_back(N->getOperand(4));
4755 Ops.push_back(Chain);
4756 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004757 case NVPTXISD::Suld3DV2I64Trap:
4758 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4759 Ops.push_back(TexHandle);
4760 Ops.push_back(N->getOperand(2));
4761 Ops.push_back(N->getOperand(3));
4762 Ops.push_back(N->getOperand(4));
4763 Ops.push_back(Chain);
4764 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004765 case NVPTXISD::Suld3DV4I8Trap:
4766 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4767 Ops.push_back(TexHandle);
4768 Ops.push_back(N->getOperand(2));
4769 Ops.push_back(N->getOperand(3));
4770 Ops.push_back(N->getOperand(4));
4771 Ops.push_back(Chain);
4772 break;
4773 case NVPTXISD::Suld3DV4I16Trap:
4774 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4775 Ops.push_back(TexHandle);
4776 Ops.push_back(N->getOperand(2));
4777 Ops.push_back(N->getOperand(3));
4778 Ops.push_back(N->getOperand(4));
4779 Ops.push_back(Chain);
4780 break;
4781 case NVPTXISD::Suld3DV4I32Trap:
4782 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4783 Ops.push_back(TexHandle);
4784 Ops.push_back(N->getOperand(2));
4785 Ops.push_back(N->getOperand(3));
4786 Ops.push_back(N->getOperand(4));
4787 Ops.push_back(Chain);
4788 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004789 case NVPTXISD::Suld1DI8Zero:
4790 Opc = NVPTX::SULD_1D_I8_ZERO;
4791 Ops.push_back(TexHandle);
4792 Ops.push_back(N->getOperand(2));
4793 Ops.push_back(Chain);
4794 break;
4795 case NVPTXISD::Suld1DI16Zero:
4796 Opc = NVPTX::SULD_1D_I16_ZERO;
4797 Ops.push_back(TexHandle);
4798 Ops.push_back(N->getOperand(2));
4799 Ops.push_back(Chain);
4800 break;
4801 case NVPTXISD::Suld1DI32Zero:
4802 Opc = NVPTX::SULD_1D_I32_ZERO;
4803 Ops.push_back(TexHandle);
4804 Ops.push_back(N->getOperand(2));
4805 Ops.push_back(Chain);
4806 break;
4807 case NVPTXISD::Suld1DI64Zero:
4808 Opc = NVPTX::SULD_1D_I64_ZERO;
4809 Ops.push_back(TexHandle);
4810 Ops.push_back(N->getOperand(2));
4811 Ops.push_back(Chain);
4812 break;
4813 case NVPTXISD::Suld1DV2I8Zero:
4814 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4815 Ops.push_back(TexHandle);
4816 Ops.push_back(N->getOperand(2));
4817 Ops.push_back(Chain);
4818 break;
4819 case NVPTXISD::Suld1DV2I16Zero:
4820 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4821 Ops.push_back(TexHandle);
4822 Ops.push_back(N->getOperand(2));
4823 Ops.push_back(Chain);
4824 break;
4825 case NVPTXISD::Suld1DV2I32Zero:
4826 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4827 Ops.push_back(TexHandle);
4828 Ops.push_back(N->getOperand(2));
4829 Ops.push_back(Chain);
4830 break;
4831 case NVPTXISD::Suld1DV2I64Zero:
4832 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4833 Ops.push_back(TexHandle);
4834 Ops.push_back(N->getOperand(2));
4835 Ops.push_back(Chain);
4836 break;
4837 case NVPTXISD::Suld1DV4I8Zero:
4838 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4839 Ops.push_back(TexHandle);
4840 Ops.push_back(N->getOperand(2));
4841 Ops.push_back(Chain);
4842 break;
4843 case NVPTXISD::Suld1DV4I16Zero:
4844 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4845 Ops.push_back(TexHandle);
4846 Ops.push_back(N->getOperand(2));
4847 Ops.push_back(Chain);
4848 break;
4849 case NVPTXISD::Suld1DV4I32Zero:
4850 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4851 Ops.push_back(TexHandle);
4852 Ops.push_back(N->getOperand(2));
4853 Ops.push_back(Chain);
4854 break;
4855 case NVPTXISD::Suld1DArrayI8Zero:
4856 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4857 Ops.push_back(TexHandle);
4858 Ops.push_back(N->getOperand(2));
4859 Ops.push_back(N->getOperand(3));
4860 Ops.push_back(Chain);
4861 break;
4862 case NVPTXISD::Suld1DArrayI16Zero:
4863 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4864 Ops.push_back(TexHandle);
4865 Ops.push_back(N->getOperand(2));
4866 Ops.push_back(N->getOperand(3));
4867 Ops.push_back(Chain);
4868 break;
4869 case NVPTXISD::Suld1DArrayI32Zero:
4870 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4871 Ops.push_back(TexHandle);
4872 Ops.push_back(N->getOperand(2));
4873 Ops.push_back(N->getOperand(3));
4874 Ops.push_back(Chain);
4875 break;
4876 case NVPTXISD::Suld1DArrayI64Zero:
4877 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4878 Ops.push_back(TexHandle);
4879 Ops.push_back(N->getOperand(2));
4880 Ops.push_back(N->getOperand(3));
4881 Ops.push_back(Chain);
4882 break;
4883 case NVPTXISD::Suld1DArrayV2I8Zero:
4884 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4885 Ops.push_back(TexHandle);
4886 Ops.push_back(N->getOperand(2));
4887 Ops.push_back(N->getOperand(3));
4888 Ops.push_back(Chain);
4889 break;
4890 case NVPTXISD::Suld1DArrayV2I16Zero:
4891 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4892 Ops.push_back(TexHandle);
4893 Ops.push_back(N->getOperand(2));
4894 Ops.push_back(N->getOperand(3));
4895 Ops.push_back(Chain);
4896 break;
4897 case NVPTXISD::Suld1DArrayV2I32Zero:
4898 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4899 Ops.push_back(TexHandle);
4900 Ops.push_back(N->getOperand(2));
4901 Ops.push_back(N->getOperand(3));
4902 Ops.push_back(Chain);
4903 break;
4904 case NVPTXISD::Suld1DArrayV2I64Zero:
4905 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4906 Ops.push_back(TexHandle);
4907 Ops.push_back(N->getOperand(2));
4908 Ops.push_back(N->getOperand(3));
4909 Ops.push_back(Chain);
4910 break;
4911 case NVPTXISD::Suld1DArrayV4I8Zero:
4912 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4913 Ops.push_back(TexHandle);
4914 Ops.push_back(N->getOperand(2));
4915 Ops.push_back(N->getOperand(3));
4916 Ops.push_back(Chain);
4917 break;
4918 case NVPTXISD::Suld1DArrayV4I16Zero:
4919 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4920 Ops.push_back(TexHandle);
4921 Ops.push_back(N->getOperand(2));
4922 Ops.push_back(N->getOperand(3));
4923 Ops.push_back(Chain);
4924 break;
4925 case NVPTXISD::Suld1DArrayV4I32Zero:
4926 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4927 Ops.push_back(TexHandle);
4928 Ops.push_back(N->getOperand(2));
4929 Ops.push_back(N->getOperand(3));
4930 Ops.push_back(Chain);
4931 break;
4932 case NVPTXISD::Suld2DI8Zero:
4933 Opc = NVPTX::SULD_2D_I8_ZERO;
4934 Ops.push_back(TexHandle);
4935 Ops.push_back(N->getOperand(2));
4936 Ops.push_back(N->getOperand(3));
4937 Ops.push_back(Chain);
4938 break;
4939 case NVPTXISD::Suld2DI16Zero:
4940 Opc = NVPTX::SULD_2D_I16_ZERO;
4941 Ops.push_back(TexHandle);
4942 Ops.push_back(N->getOperand(2));
4943 Ops.push_back(N->getOperand(3));
4944 Ops.push_back(Chain);
4945 break;
4946 case NVPTXISD::Suld2DI32Zero:
4947 Opc = NVPTX::SULD_2D_I32_ZERO;
4948 Ops.push_back(TexHandle);
4949 Ops.push_back(N->getOperand(2));
4950 Ops.push_back(N->getOperand(3));
4951 Ops.push_back(Chain);
4952 break;
4953 case NVPTXISD::Suld2DI64Zero:
4954 Opc = NVPTX::SULD_2D_I64_ZERO;
4955 Ops.push_back(TexHandle);
4956 Ops.push_back(N->getOperand(2));
4957 Ops.push_back(N->getOperand(3));
4958 Ops.push_back(Chain);
4959 break;
4960 case NVPTXISD::Suld2DV2I8Zero:
4961 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4962 Ops.push_back(TexHandle);
4963 Ops.push_back(N->getOperand(2));
4964 Ops.push_back(N->getOperand(3));
4965 Ops.push_back(Chain);
4966 break;
4967 case NVPTXISD::Suld2DV2I16Zero:
4968 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4969 Ops.push_back(TexHandle);
4970 Ops.push_back(N->getOperand(2));
4971 Ops.push_back(N->getOperand(3));
4972 Ops.push_back(Chain);
4973 break;
4974 case NVPTXISD::Suld2DV2I32Zero:
4975 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4976 Ops.push_back(TexHandle);
4977 Ops.push_back(N->getOperand(2));
4978 Ops.push_back(N->getOperand(3));
4979 Ops.push_back(Chain);
4980 break;
4981 case NVPTXISD::Suld2DV2I64Zero:
4982 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4983 Ops.push_back(TexHandle);
4984 Ops.push_back(N->getOperand(2));
4985 Ops.push_back(N->getOperand(3));
4986 Ops.push_back(Chain);
4987 break;
4988 case NVPTXISD::Suld2DV4I8Zero:
4989 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4990 Ops.push_back(TexHandle);
4991 Ops.push_back(N->getOperand(2));
4992 Ops.push_back(N->getOperand(3));
4993 Ops.push_back(Chain);
4994 break;
4995 case NVPTXISD::Suld2DV4I16Zero:
4996 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4997 Ops.push_back(TexHandle);
4998 Ops.push_back(N->getOperand(2));
4999 Ops.push_back(N->getOperand(3));
5000 Ops.push_back(Chain);
5001 break;
5002 case NVPTXISD::Suld2DV4I32Zero:
5003 Opc = NVPTX::SULD_2D_V4I32_ZERO;
5004 Ops.push_back(TexHandle);
5005 Ops.push_back(N->getOperand(2));
5006 Ops.push_back(N->getOperand(3));
5007 Ops.push_back(Chain);
5008 break;
5009 case NVPTXISD::Suld2DArrayI8Zero:
5010 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
5011 Ops.push_back(TexHandle);
5012 Ops.push_back(N->getOperand(2));
5013 Ops.push_back(N->getOperand(3));
5014 Ops.push_back(N->getOperand(4));
5015 Ops.push_back(Chain);
5016 break;
5017 case NVPTXISD::Suld2DArrayI16Zero:
5018 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
5019 Ops.push_back(TexHandle);
5020 Ops.push_back(N->getOperand(2));
5021 Ops.push_back(N->getOperand(3));
5022 Ops.push_back(N->getOperand(4));
5023 Ops.push_back(Chain);
5024 break;
5025 case NVPTXISD::Suld2DArrayI32Zero:
5026 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
5027 Ops.push_back(TexHandle);
5028 Ops.push_back(N->getOperand(2));
5029 Ops.push_back(N->getOperand(3));
5030 Ops.push_back(N->getOperand(4));
5031 Ops.push_back(Chain);
5032 break;
5033 case NVPTXISD::Suld2DArrayI64Zero:
5034 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
5035 Ops.push_back(TexHandle);
5036 Ops.push_back(N->getOperand(2));
5037 Ops.push_back(N->getOperand(3));
5038 Ops.push_back(N->getOperand(4));
5039 Ops.push_back(Chain);
5040 break;
5041 case NVPTXISD::Suld2DArrayV2I8Zero:
5042 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
5043 Ops.push_back(TexHandle);
5044 Ops.push_back(N->getOperand(2));
5045 Ops.push_back(N->getOperand(3));
5046 Ops.push_back(N->getOperand(4));
5047 Ops.push_back(Chain);
5048 break;
5049 case NVPTXISD::Suld2DArrayV2I16Zero:
5050 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
5051 Ops.push_back(TexHandle);
5052 Ops.push_back(N->getOperand(2));
5053 Ops.push_back(N->getOperand(3));
5054 Ops.push_back(N->getOperand(4));
5055 Ops.push_back(Chain);
5056 break;
5057 case NVPTXISD::Suld2DArrayV2I32Zero:
5058 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
5059 Ops.push_back(TexHandle);
5060 Ops.push_back(N->getOperand(2));
5061 Ops.push_back(N->getOperand(3));
5062 Ops.push_back(N->getOperand(4));
5063 Ops.push_back(Chain);
5064 break;
5065 case NVPTXISD::Suld2DArrayV2I64Zero:
5066 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
5067 Ops.push_back(TexHandle);
5068 Ops.push_back(N->getOperand(2));
5069 Ops.push_back(N->getOperand(3));
5070 Ops.push_back(N->getOperand(4));
5071 Ops.push_back(Chain);
5072 break;
5073 case NVPTXISD::Suld2DArrayV4I8Zero:
5074 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
5075 Ops.push_back(TexHandle);
5076 Ops.push_back(N->getOperand(2));
5077 Ops.push_back(N->getOperand(3));
5078 Ops.push_back(N->getOperand(4));
5079 Ops.push_back(Chain);
5080 break;
5081 case NVPTXISD::Suld2DArrayV4I16Zero:
5082 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
5083 Ops.push_back(TexHandle);
5084 Ops.push_back(N->getOperand(2));
5085 Ops.push_back(N->getOperand(3));
5086 Ops.push_back(N->getOperand(4));
5087 Ops.push_back(Chain);
5088 break;
5089 case NVPTXISD::Suld2DArrayV4I32Zero:
5090 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
5091 Ops.push_back(TexHandle);
5092 Ops.push_back(N->getOperand(2));
5093 Ops.push_back(N->getOperand(3));
5094 Ops.push_back(N->getOperand(4));
5095 Ops.push_back(Chain);
5096 break;
5097 case NVPTXISD::Suld3DI8Zero:
5098 Opc = NVPTX::SULD_3D_I8_ZERO;
5099 Ops.push_back(TexHandle);
5100 Ops.push_back(N->getOperand(2));
5101 Ops.push_back(N->getOperand(3));
5102 Ops.push_back(N->getOperand(4));
5103 Ops.push_back(Chain);
5104 break;
5105 case NVPTXISD::Suld3DI16Zero:
5106 Opc = NVPTX::SULD_3D_I16_ZERO;
5107 Ops.push_back(TexHandle);
5108 Ops.push_back(N->getOperand(2));
5109 Ops.push_back(N->getOperand(3));
5110 Ops.push_back(N->getOperand(4));
5111 Ops.push_back(Chain);
5112 break;
5113 case NVPTXISD::Suld3DI32Zero:
5114 Opc = NVPTX::SULD_3D_I32_ZERO;
5115 Ops.push_back(TexHandle);
5116 Ops.push_back(N->getOperand(2));
5117 Ops.push_back(N->getOperand(3));
5118 Ops.push_back(N->getOperand(4));
5119 Ops.push_back(Chain);
5120 break;
5121 case NVPTXISD::Suld3DI64Zero:
5122 Opc = NVPTX::SULD_3D_I64_ZERO;
5123 Ops.push_back(TexHandle);
5124 Ops.push_back(N->getOperand(2));
5125 Ops.push_back(N->getOperand(3));
5126 Ops.push_back(N->getOperand(4));
5127 Ops.push_back(Chain);
5128 break;
5129 case NVPTXISD::Suld3DV2I8Zero:
5130 Opc = NVPTX::SULD_3D_V2I8_ZERO;
5131 Ops.push_back(TexHandle);
5132 Ops.push_back(N->getOperand(2));
5133 Ops.push_back(N->getOperand(3));
5134 Ops.push_back(N->getOperand(4));
5135 Ops.push_back(Chain);
5136 break;
5137 case NVPTXISD::Suld3DV2I16Zero:
5138 Opc = NVPTX::SULD_3D_V2I16_ZERO;
5139 Ops.push_back(TexHandle);
5140 Ops.push_back(N->getOperand(2));
5141 Ops.push_back(N->getOperand(3));
5142 Ops.push_back(N->getOperand(4));
5143 Ops.push_back(Chain);
5144 break;
5145 case NVPTXISD::Suld3DV2I32Zero:
5146 Opc = NVPTX::SULD_3D_V2I32_ZERO;
5147 Ops.push_back(TexHandle);
5148 Ops.push_back(N->getOperand(2));
5149 Ops.push_back(N->getOperand(3));
5150 Ops.push_back(N->getOperand(4));
5151 Ops.push_back(Chain);
5152 break;
5153 case NVPTXISD::Suld3DV2I64Zero:
5154 Opc = NVPTX::SULD_3D_V2I64_ZERO;
5155 Ops.push_back(TexHandle);
5156 Ops.push_back(N->getOperand(2));
5157 Ops.push_back(N->getOperand(3));
5158 Ops.push_back(N->getOperand(4));
5159 Ops.push_back(Chain);
5160 break;
5161 case NVPTXISD::Suld3DV4I8Zero:
5162 Opc = NVPTX::SULD_3D_V4I8_ZERO;
5163 Ops.push_back(TexHandle);
5164 Ops.push_back(N->getOperand(2));
5165 Ops.push_back(N->getOperand(3));
5166 Ops.push_back(N->getOperand(4));
5167 Ops.push_back(Chain);
5168 break;
5169 case NVPTXISD::Suld3DV4I16Zero:
5170 Opc = NVPTX::SULD_3D_V4I16_ZERO;
5171 Ops.push_back(TexHandle);
5172 Ops.push_back(N->getOperand(2));
5173 Ops.push_back(N->getOperand(3));
5174 Ops.push_back(N->getOperand(4));
5175 Ops.push_back(Chain);
5176 break;
5177 case NVPTXISD::Suld3DV4I32Zero:
5178 Opc = NVPTX::SULD_3D_V4I32_ZERO;
5179 Ops.push_back(TexHandle);
5180 Ops.push_back(N->getOperand(2));
5181 Ops.push_back(N->getOperand(3));
5182 Ops.push_back(N->getOperand(4));
5183 Ops.push_back(Chain);
5184 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00005185 }
Justin Bogner8d83fb62016-05-13 21:12:53 +00005186 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
5187 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00005188}
5189
Justin Holewinski9a2350e2014-07-17 11:59:04 +00005190
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005191/// SelectBFE - Look for instruction sequences that can be made more efficient
5192/// by using the 'bfe' (bit-field extract) PTX instruction
Justin Bogner8d83fb62016-05-13 21:12:53 +00005193bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005194 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005195 SDValue LHS = N->getOperand(0);
5196 SDValue RHS = N->getOperand(1);
5197 SDValue Len;
5198 SDValue Start;
5199 SDValue Val;
5200 bool IsSigned = false;
5201
5202 if (N->getOpcode() == ISD::AND) {
5203 // Canonicalize the operands
5204 // We want 'and %val, %mask'
5205 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
5206 std::swap(LHS, RHS);
5207 }
5208
5209 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
5210 if (!Mask) {
5211 // We need a constant mask on the RHS of the AND
Justin Bogner8d83fb62016-05-13 21:12:53 +00005212 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005213 }
5214
5215 // Extract the mask bits
5216 uint64_t MaskVal = Mask->getZExtValue();
5217 if (!isMask_64(MaskVal)) {
5218 // We *could* handle shifted masks here, but doing so would require an
5219 // 'and' operation to fix up the low-order bits so we would trade
5220 // shr+and for bfe+and, which has the same throughput
Justin Bogner8d83fb62016-05-13 21:12:53 +00005221 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005222 }
5223
5224 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00005225 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005226 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005227
5228 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
5229 // We have a 'srl/and' pair, extract the effective start bit and length
5230 Val = LHS.getNode()->getOperand(0);
5231 Start = LHS.getNode()->getOperand(1);
5232 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
5233 if (StartConst) {
5234 uint64_t StartVal = StartConst->getZExtValue();
5235 // How many "good" bits do we have left? "good" is defined here as bits
5236 // that exist in the original value, not shifted in.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00005237 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005238 if (NumBits > GoodBits) {
5239 // Do not handle the case where bits have been shifted in. In theory
5240 // we could handle this, but the cost is likely higher than just
5241 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00005242 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005243 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005244 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005245 } else {
5246 // Do not handle the case where the shift amount (can be zero if no srl
5247 // was found) is not constant. We could handle this case, but it would
5248 // require run-time logic that would be more expensive than just
5249 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00005250 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005251 }
5252 } else {
5253 // Do not handle the case where the LHS of the and is not a shift. While
5254 // it would be trivial to handle this case, it would just transform
5255 // 'and' -> 'bfe', but 'and' has higher-throughput.
Justin Bogner8d83fb62016-05-13 21:12:53 +00005256 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005257 }
5258 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
5259 if (LHS->getOpcode() == ISD::AND) {
5260 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
5261 if (!ShiftCnst) {
5262 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00005263 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005264 }
5265
5266 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
5267
5268 SDValue AndLHS = LHS->getOperand(0);
5269 SDValue AndRHS = LHS->getOperand(1);
5270
5271 // Canonicalize the AND to have the mask on the RHS
5272 if (isa<ConstantSDNode>(AndLHS)) {
5273 std::swap(AndLHS, AndRHS);
5274 }
5275
5276 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
5277 if (!MaskCnst) {
5278 // Mask must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00005279 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005280 }
5281
5282 uint64_t MaskVal = MaskCnst->getZExtValue();
5283 uint64_t NumZeros;
5284 uint64_t NumBits;
5285 if (isMask_64(MaskVal)) {
5286 NumZeros = 0;
5287 // The number of bits in the result bitfield will be the number of
5288 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00005289 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005290 } else if (isShiftedMask_64(MaskVal)) {
5291 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00005292 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005293 // The number of bits in the result bitfield will be the number of
5294 // trailing zeros plus the number of set bits in the mask minus the
5295 // number of bits we shift off
5296 NumBits = NumZeros + NumOnes - ShiftAmt;
5297 } else {
5298 // This is not a mask we can handle
Justin Bogner8d83fb62016-05-13 21:12:53 +00005299 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005300 }
5301
5302 if (ShiftAmt < NumZeros) {
5303 // Handling this case would require extra logic that would make this
5304 // transformation non-profitable
Justin Bogner8d83fb62016-05-13 21:12:53 +00005305 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005306 }
5307
5308 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005309 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
5310 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005311 } else if (LHS->getOpcode() == ISD::SHL) {
5312 // Here, we have a pattern like:
5313 //
5314 // (sra (shl val, NN), MM)
5315 // or
5316 // (srl (shl val, NN), MM)
5317 //
5318 // If MM >= NN, we can efficiently optimize this with bfe
5319 Val = LHS->getOperand(0);
5320
5321 SDValue ShlRHS = LHS->getOperand(1);
5322 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
5323 if (!ShlCnst) {
5324 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00005325 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005326 }
5327 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
5328
5329 SDValue ShrRHS = RHS;
5330 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
5331 if (!ShrCnst) {
5332 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00005333 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005334 }
5335 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5336
5337 // To avoid extra codegen and be profitable, we need Outer >= Inner
5338 if (OuterShiftAmt < InnerShiftAmt) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00005339 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005340 }
5341
5342 // If the outer shift is more than the type size, we have no bitfield to
5343 // extract (since we also check that the inner shift is <= the outer shift
5344 // then this also implies that the inner shift is < the type size)
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00005345 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00005346 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005347 }
5348
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00005349 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
5350 MVT::i32);
5351 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
5352 DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005353
5354 if (N->getOpcode() == ISD::SRA) {
5355 // If we have a arithmetic right shift, we need to use the signed bfe
5356 // variant
5357 IsSigned = true;
5358 }
5359 } else {
5360 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00005361 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005362 }
5363 } else {
5364 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00005365 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005366 }
5367
5368
5369 unsigned Opc;
5370 // For the BFE operations we form here from "and" and "srl", always use the
5371 // unsigned variants.
5372 if (Val.getValueType() == MVT::i32) {
5373 if (IsSigned) {
5374 Opc = NVPTX::BFE_S32rii;
5375 } else {
5376 Opc = NVPTX::BFE_U32rii;
5377 }
5378 } else if (Val.getValueType() == MVT::i64) {
5379 if (IsSigned) {
5380 Opc = NVPTX::BFE_S64rii;
5381 } else {
5382 Opc = NVPTX::BFE_U64rii;
5383 }
5384 } else {
5385 // We cannot handle this type
Justin Bogner8d83fb62016-05-13 21:12:53 +00005386 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005387 }
5388
5389 SDValue Ops[] = {
5390 Val, Start, Len
5391 };
5392
Justin Bogner8d83fb62016-05-13 21:12:53 +00005393 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
5394 return true;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005395}
5396
Justin Holewinskiae556d32012-05-04 20:18:50 +00005397// SelectDirectAddr - Match a direct address for DAG.
5398// A direct address could be a globaladdress or externalsymbol.
5399bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5400 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00005401 if (N.getOpcode() == ISD::TargetGlobalAddress ||
5402 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005403 Address = N;
5404 return true;
5405 }
5406 if (N.getOpcode() == NVPTXISD::Wrapper) {
5407 Address = N.getOperand(0);
5408 return true;
5409 }
Artem Belevichb2e76a52016-07-20 18:39:47 +00005410 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
5411 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
5412 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
5413 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
5414 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
5415 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005416 }
5417 return false;
5418}
5419
5420// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00005421bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5422 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005423 if (Addr.getOpcode() == ISD::ADD) {
5424 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00005425 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005426 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005427 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5428 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005429 return true;
5430 }
5431 }
5432 }
5433 return false;
5434}
5435
5436// symbol+offset
5437bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5438 SDValue &Base, SDValue &Offset) {
5439 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5440}
5441
5442// symbol+offset
5443bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5444 SDValue &Base, SDValue &Offset) {
5445 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5446}
5447
5448// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00005449bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5450 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005451 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5452 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005453 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005454 return true;
5455 }
5456 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5457 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00005458 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00005459
5460 if (Addr.getOpcode() == ISD::ADD) {
5461 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5462 return false;
5463 }
5464 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5465 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00005466 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00005467 // Constant offset from frame ref.
5468 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5469 else
5470 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005471 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5472 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005473 return true;
5474 }
5475 }
5476 return false;
5477}
5478
5479// register+offset
5480bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5481 SDValue &Base, SDValue &Offset) {
5482 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5483}
5484
5485// register+offset
5486bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5487 SDValue &Base, SDValue &Offset) {
5488 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5489}
5490
5491bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5492 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00005493 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00005494 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00005495 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5496 return true;
5497 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00005498 }
5499 if (!Src)
5500 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00005501 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00005502 return (PT->getAddressSpace() == spN);
5503 return false;
5504}
5505
5506/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5507/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00005508bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00005509 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005510 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00005511 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00005512 default:
5513 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00005514 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00005515 if (SelectDirectAddr(Op, Op0)) {
5516 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005517 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00005518 return false;
5519 }
5520 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5521 OutOps.push_back(Op0);
5522 OutOps.push_back(Op1);
5523 return false;
5524 }
5525 break;
5526 }
5527 return true;
5528}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00005529
5530/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5531/// conversion from \p SrcTy to \p DestTy.
5532unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5533 bool IsSigned) {
5534 switch (SrcTy.SimpleTy) {
5535 default:
5536 llvm_unreachable("Unhandled source type");
5537 case MVT::i8:
5538 switch (DestTy.SimpleTy) {
5539 default:
5540 llvm_unreachable("Unhandled dest type");
5541 case MVT::i16:
5542 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5543 case MVT::i32:
5544 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5545 case MVT::i64:
5546 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5547 }
5548 case MVT::i16:
5549 switch (DestTy.SimpleTy) {
5550 default:
5551 llvm_unreachable("Unhandled dest type");
5552 case MVT::i8:
5553 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5554 case MVT::i32:
5555 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5556 case MVT::i64:
5557 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5558 }
5559 case MVT::i32:
5560 switch (DestTy.SimpleTy) {
5561 default:
5562 llvm_unreachable("Unhandled dest type");
5563 case MVT::i8:
5564 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5565 case MVT::i16:
5566 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5567 case MVT::i64:
5568 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5569 }
5570 case MVT::i64:
5571 switch (DestTy.SimpleTy) {
5572 default:
5573 llvm_unreachable("Unhandled dest type");
5574 case MVT::i8:
5575 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5576 case MVT::i16:
5577 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5578 case MVT::i32:
5579 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
5580 }
5581 }
5582}