blob: 0761ff4cbd317594dea693ffc24287bd4aea3084 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief TargetLowering functions borrowed from AMDIL.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUISelLowering.h"
16#include "AMDGPURegisterInfo.h"
Chandler Carruth58a2cbe2013-01-02 10:22:59 +000017#include "AMDGPUSubtarget.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000018#include "AMDILIntrinsicInfo.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SelectionDAGNodes.h"
24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth0b8c9a82013-01-02 11:36:10 +000025#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Instructions.h"
28#include "llvm/IR/Intrinsics.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000029#include "llvm/Support/raw_ostream.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetOptions.h"
32
33using namespace llvm;
34//===----------------------------------------------------------------------===//
Tom Stellardf98f2ce2012-12-11 21:25:42 +000035// TargetLowering Implementation Help Functions End
36//===----------------------------------------------------------------------===//
37
38//===----------------------------------------------------------------------===//
39// TargetLowering Class Implementation Begins
40//===----------------------------------------------------------------------===//
41void AMDGPUTargetLowering::InitAMDILLowering() {
Craig Topper787e71d2013-07-15 06:39:13 +000042 static const int types[] = {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000043 (int)MVT::i8,
44 (int)MVT::i16,
45 (int)MVT::i32,
46 (int)MVT::f32,
47 (int)MVT::f64,
48 (int)MVT::i64,
49 (int)MVT::v2i8,
50 (int)MVT::v4i8,
51 (int)MVT::v2i16,
52 (int)MVT::v4i16,
53 (int)MVT::v4f32,
54 (int)MVT::v4i32,
55 (int)MVT::v2f32,
56 (int)MVT::v2i32,
57 (int)MVT::v2f64,
58 (int)MVT::v2i64
59 };
60
Craig Topper787e71d2013-07-15 06:39:13 +000061 static const int IntTypes[] = {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000062 (int)MVT::i8,
63 (int)MVT::i16,
64 (int)MVT::i32,
65 (int)MVT::i64
66 };
67
Craig Topper787e71d2013-07-15 06:39:13 +000068 static const int FloatTypes[] = {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000069 (int)MVT::f32,
70 (int)MVT::f64
71 };
72
Craig Topper787e71d2013-07-15 06:39:13 +000073 static const int VectorTypes[] = {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000074 (int)MVT::v2i8,
75 (int)MVT::v4i8,
76 (int)MVT::v2i16,
77 (int)MVT::v4i16,
78 (int)MVT::v4f32,
79 (int)MVT::v4i32,
80 (int)MVT::v2f32,
81 (int)MVT::v2i32,
82 (int)MVT::v2f64,
83 (int)MVT::v2i64
84 };
Craig Topperb9df53a2013-07-15 04:27:47 +000085 const size_t NumTypes = array_lengthof(types);
86 const size_t NumFloatTypes = array_lengthof(FloatTypes);
87 const size_t NumIntTypes = array_lengthof(IntTypes);
88 const size_t NumVectorTypes = array_lengthof(VectorTypes);
Tom Stellardf98f2ce2012-12-11 21:25:42 +000089
90 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
91 // These are the current register classes that are
92 // supported
93
94 for (unsigned int x = 0; x < NumTypes; ++x) {
95 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
96
Tom Stellardf98f2ce2012-12-11 21:25:42 +000097 setOperationAction(ISD::SUBE, VT, Expand);
98 setOperationAction(ISD::SUBC, VT, Expand);
99 setOperationAction(ISD::ADDE, VT, Expand);
100 setOperationAction(ISD::ADDC, VT, Expand);
101 setOperationAction(ISD::BRCOND, VT, Custom);
102 setOperationAction(ISD::BR_JT, VT, Expand);
103 setOperationAction(ISD::BRIND, VT, Expand);
104 // TODO: Implement custom UREM/SREM routines
105 setOperationAction(ISD::SREM, VT, Expand);
106 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
107 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
108 if (VT != MVT::i64 && VT != MVT::v2i64) {
109 setOperationAction(ISD::SDIV, VT, Custom);
110 }
111 }
112 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
113 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
114
115 // IL does not have these operations for floating point types
116 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
117 setOperationAction(ISD::SETOLT, VT, Expand);
118 setOperationAction(ISD::SETOGE, VT, Expand);
119 setOperationAction(ISD::SETOGT, VT, Expand);
120 setOperationAction(ISD::SETOLE, VT, Expand);
121 setOperationAction(ISD::SETULT, VT, Expand);
122 setOperationAction(ISD::SETUGE, VT, Expand);
123 setOperationAction(ISD::SETUGT, VT, Expand);
124 setOperationAction(ISD::SETULE, VT, Expand);
125 }
126
127 for (unsigned int x = 0; x < NumIntTypes; ++x) {
128 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
129
130 // GPU also does not have divrem function for signed or unsigned
131 setOperationAction(ISD::SDIVREM, VT, Expand);
132
133 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
134 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
135 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
136
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000137 setOperationAction(ISD::BSWAP, VT, Expand);
138
139 // GPU doesn't have any counting operators
140 setOperationAction(ISD::CTPOP, VT, Expand);
141 setOperationAction(ISD::CTTZ, VT, Expand);
142 setOperationAction(ISD::CTLZ, VT, Expand);
143 }
144
145 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
146 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
147
148 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
149 setOperationAction(ISD::SDIVREM, VT, Expand);
150 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
151 // setOperationAction(ISD::VSETCC, VT, Expand);
152 setOperationAction(ISD::SELECT_CC, VT, Expand);
153
154 }
Tom Stellard3ff0abf2013-06-07 20:37:48 +0000155 setOperationAction(ISD::MULHU, MVT::i64, Expand);
156 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
157 setOperationAction(ISD::MULHS, MVT::i64, Expand);
158 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
159 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
160 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
161 setOperationAction(ISD::Constant , MVT::i64 , Legal);
162 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
163 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
164 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
165 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
166 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
167 if (STM.hasHWFP64()) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000168 // we support loading/storing v2f64 but not operations on the type
169 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
170 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
171 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
172 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
173 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
174 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
175 // We want to expand vector conversions into their scalar
176 // counterparts.
177 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
178 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
179 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
180 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
181 setOperationAction(ISD::FABS, MVT::f64, Expand);
182 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
183 }
184 // TODO: Fix the UDIV24 algorithm so it works for these
185 // types correctly. This needs vector comparisons
186 // for this to work correctly.
187 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
188 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
189 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
190 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000191 setOperationAction(ISD::SUBC, MVT::Other, Expand);
192 setOperationAction(ISD::ADDE, MVT::Other, Expand);
193 setOperationAction(ISD::ADDC, MVT::Other, Expand);
194 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
195 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
196 setOperationAction(ISD::BRIND, MVT::Other, Expand);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000197
198
199 // Use the default implementation.
200 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
201 setOperationAction(ISD::Constant , MVT::i32 , Legal);
202
203 setSchedulingPreference(Sched::RegPressure);
204 setPow2DivIsCheap(false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000205 setSelectIsExpensive(true);
206 setJumpIsExpensive(true);
207
Jim Grosbach64f3e762013-02-20 21:31:28 +0000208 MaxStoresPerMemcpy = 4096;
209 MaxStoresPerMemmove = 4096;
210 MaxStoresPerMemset = 4096;
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000211
212}
213
214bool
215AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
216 const CallInst &I, unsigned Intrinsic) const {
217 return false;
218}
219
220// The backend supports 32 and 64 bit floating point immediates
221bool
222AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
223 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
224 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
225 return true;
226 } else {
227 return false;
228 }
229}
230
231bool
232AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
233 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
234 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
235 return false;
236 } else {
237 return true;
238 }
239}
240
241
242// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
243// be zero. Op is expected to be a target specific node. Used by DAG
244// combiner.
245
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000246//===----------------------------------------------------------------------===//
247// Other Lowering Hooks
248//===----------------------------------------------------------------------===//
249
250SDValue
251AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
252 EVT OVT = Op.getValueType();
253 SDValue DST;
254 if (OVT.getScalarType() == MVT::i64) {
255 DST = LowerSDIV64(Op, DAG);
256 } else if (OVT.getScalarType() == MVT::i32) {
257 DST = LowerSDIV32(Op, DAG);
258 } else if (OVT.getScalarType() == MVT::i16
259 || OVT.getScalarType() == MVT::i8) {
260 DST = LowerSDIV24(Op, DAG);
261 } else {
262 DST = SDValue(Op.getNode(), 0);
263 }
264 return DST;
265}
266
267SDValue
268AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
269 EVT OVT = Op.getValueType();
270 SDValue DST;
271 if (OVT.getScalarType() == MVT::i64) {
272 DST = LowerSREM64(Op, DAG);
273 } else if (OVT.getScalarType() == MVT::i32) {
274 DST = LowerSREM32(Op, DAG);
275 } else if (OVT.getScalarType() == MVT::i16) {
276 DST = LowerSREM16(Op, DAG);
277 } else if (OVT.getScalarType() == MVT::i8) {
278 DST = LowerSREM8(Op, DAG);
279 } else {
280 DST = SDValue(Op.getNode(), 0);
281 }
282 return DST;
283}
284
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000285EVT
286AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
287 int iSize = (size * numEle);
288 int vEle = (iSize >> ((size == 64) ? 6 : 5));
289 if (!vEle) {
290 vEle = 1;
291 }
292 if (size == 64) {
293 if (vEle == 1) {
294 return EVT(MVT::i64);
295 } else {
296 return EVT(MVT::getVectorVT(MVT::i64, vEle));
297 }
298 } else {
299 if (vEle == 1) {
300 return EVT(MVT::i32);
301 } else {
302 return EVT(MVT::getVectorVT(MVT::i32, vEle));
303 }
304 }
305}
306
307SDValue
308AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
309 SDValue Chain = Op.getOperand(0);
310 SDValue Cond = Op.getOperand(1);
311 SDValue Jump = Op.getOperand(2);
312 SDValue Result;
313 Result = DAG.getNode(
314 AMDGPUISD::BRANCH_COND,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000315 SDLoc(Op),
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000316 Op.getValueType(),
317 Chain, Jump, Cond);
318 return Result;
319}
320
321SDValue
322AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000323 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000324 EVT OVT = Op.getValueType();
325 SDValue LHS = Op.getOperand(0);
326 SDValue RHS = Op.getOperand(1);
327 MVT INTTY;
328 MVT FLTTY;
329 if (!OVT.isVector()) {
330 INTTY = MVT::i32;
331 FLTTY = MVT::f32;
332 } else if (OVT.getVectorNumElements() == 2) {
333 INTTY = MVT::v2i32;
334 FLTTY = MVT::v2f32;
335 } else if (OVT.getVectorNumElements() == 4) {
336 INTTY = MVT::v4i32;
337 FLTTY = MVT::v4f32;
338 }
339 unsigned bitsize = OVT.getScalarType().getSizeInBits();
340 // char|short jq = ia ^ ib;
341 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
342
343 // jq = jq >> (bitsize - 2)
344 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
345
346 // jq = jq | 0x1
347 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
348
349 // jq = (int)jq
350 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
351
352 // int ia = (int)LHS;
353 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
354
355 // int ib, (int)RHS;
356 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
357
358 // float fa = (float)ia;
359 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
360
361 // float fb = (float)ib;
362 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
363
364 // float fq = native_divide(fa, fb);
365 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
366
367 // fq = trunc(fq);
368 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
369
370 // float fqneg = -fq;
371 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
372
373 // float fr = mad(fqneg, fb, fa);
Vincent Lejeunee3111962013-02-18 14:11:28 +0000374 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
375 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000376
377 // int iq = (int)fq;
378 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
379
380 // fr = fabs(fr);
381 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
382
383 // fb = fabs(fb);
384 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
385
386 // int cv = fr >= fb;
387 SDValue cv;
388 if (INTTY == MVT::i32) {
389 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
390 } else {
391 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
392 }
393 // jq = (cv ? jq : 0);
394 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
395 DAG.getConstant(0, OVT));
396 // dst = iq + jq;
397 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
398 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
399 return iq;
400}
401
402SDValue
403AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000404 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000405 EVT OVT = Op.getValueType();
406 SDValue LHS = Op.getOperand(0);
407 SDValue RHS = Op.getOperand(1);
408 // The LowerSDIV32 function generates equivalent to the following IL.
409 // mov r0, LHS
410 // mov r1, RHS
411 // ilt r10, r0, 0
412 // ilt r11, r1, 0
413 // iadd r0, r0, r10
414 // iadd r1, r1, r11
415 // ixor r0, r0, r10
416 // ixor r1, r1, r11
417 // udiv r0, r0, r1
418 // ixor r10, r10, r11
419 // iadd r0, r0, r10
420 // ixor DST, r0, r10
421
422 // mov r0, LHS
423 SDValue r0 = LHS;
424
425 // mov r1, RHS
426 SDValue r1 = RHS;
427
428 // ilt r10, r0, 0
429 SDValue r10 = DAG.getSelectCC(DL,
430 r0, DAG.getConstant(0, OVT),
431 DAG.getConstant(-1, MVT::i32),
432 DAG.getConstant(0, MVT::i32),
433 ISD::SETLT);
434
435 // ilt r11, r1, 0
436 SDValue r11 = DAG.getSelectCC(DL,
437 r1, DAG.getConstant(0, OVT),
438 DAG.getConstant(-1, MVT::i32),
439 DAG.getConstant(0, MVT::i32),
440 ISD::SETLT);
441
442 // iadd r0, r0, r10
443 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
444
445 // iadd r1, r1, r11
446 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
447
448 // ixor r0, r0, r10
449 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
450
451 // ixor r1, r1, r11
452 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
453
454 // udiv r0, r0, r1
455 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
456
457 // ixor r10, r10, r11
458 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
459
460 // iadd r0, r0, r10
461 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
462
463 // ixor DST, r0, r10
464 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
465 return DST;
466}
467
468SDValue
469AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
470 return SDValue(Op.getNode(), 0);
471}
472
473SDValue
474AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000475 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000476 EVT OVT = Op.getValueType();
477 MVT INTTY = MVT::i32;
478 if (OVT == MVT::v2i8) {
479 INTTY = MVT::v2i32;
480 } else if (OVT == MVT::v4i8) {
481 INTTY = MVT::v4i32;
482 }
483 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
484 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
485 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
486 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
487 return LHS;
488}
489
490SDValue
491AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000492 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000493 EVT OVT = Op.getValueType();
494 MVT INTTY = MVT::i32;
495 if (OVT == MVT::v2i16) {
496 INTTY = MVT::v2i32;
497 } else if (OVT == MVT::v4i16) {
498 INTTY = MVT::v4i32;
499 }
500 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
501 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
502 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
503 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
504 return LHS;
505}
506
507SDValue
508AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000509 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000510 EVT OVT = Op.getValueType();
511 SDValue LHS = Op.getOperand(0);
512 SDValue RHS = Op.getOperand(1);
513 // The LowerSREM32 function generates equivalent to the following IL.
514 // mov r0, LHS
515 // mov r1, RHS
516 // ilt r10, r0, 0
517 // ilt r11, r1, 0
518 // iadd r0, r0, r10
519 // iadd r1, r1, r11
520 // ixor r0, r0, r10
521 // ixor r1, r1, r11
522 // udiv r20, r0, r1
523 // umul r20, r20, r1
524 // sub r0, r0, r20
525 // iadd r0, r0, r10
526 // ixor DST, r0, r10
527
528 // mov r0, LHS
529 SDValue r0 = LHS;
530
531 // mov r1, RHS
532 SDValue r1 = RHS;
533
534 // ilt r10, r0, 0
535 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
536
537 // ilt r11, r1, 0
538 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
539
540 // iadd r0, r0, r10
541 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
542
543 // iadd r1, r1, r11
544 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
545
546 // ixor r0, r0, r10
547 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
548
549 // ixor r1, r1, r11
550 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
551
552 // udiv r20, r0, r1
553 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
554
555 // umul r20, r20, r1
556 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
557
558 // sub r0, r0, r20
559 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
560
561 // iadd r0, r0, r10
562 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
563
564 // ixor DST, r0, r10
565 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
566 return DST;
567}
568
569SDValue
570AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
571 return SDValue(Op.getNode(), 0);
572}