blob: d669966cce23d4943d9db0ee2ce0ce6c1d4827fc [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief TargetLowering functions borrowed from AMDIL.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUISelLowering.h"
16#include "AMDGPURegisterInfo.h"
Chandler Carruth58a2cbe2013-01-02 10:22:59 +000017#include "AMDGPUSubtarget.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000018#include "AMDILIntrinsicInfo.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SelectionDAGNodes.h"
24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth0b8c9a82013-01-02 11:36:10 +000025#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Instructions.h"
28#include "llvm/IR/Intrinsics.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000029#include "llvm/Support/raw_ostream.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetOptions.h"
32
33using namespace llvm;
34//===----------------------------------------------------------------------===//
Tom Stellardf98f2ce2012-12-11 21:25:42 +000035// TargetLowering Implementation Help Functions End
36//===----------------------------------------------------------------------===//
37
38//===----------------------------------------------------------------------===//
39// TargetLowering Class Implementation Begins
40//===----------------------------------------------------------------------===//
41void AMDGPUTargetLowering::InitAMDILLowering() {
42 int types[] = {
43 (int)MVT::i8,
44 (int)MVT::i16,
45 (int)MVT::i32,
46 (int)MVT::f32,
47 (int)MVT::f64,
48 (int)MVT::i64,
49 (int)MVT::v2i8,
50 (int)MVT::v4i8,
51 (int)MVT::v2i16,
52 (int)MVT::v4i16,
53 (int)MVT::v4f32,
54 (int)MVT::v4i32,
55 (int)MVT::v2f32,
56 (int)MVT::v2i32,
57 (int)MVT::v2f64,
58 (int)MVT::v2i64
59 };
60
61 int IntTypes[] = {
62 (int)MVT::i8,
63 (int)MVT::i16,
64 (int)MVT::i32,
65 (int)MVT::i64
66 };
67
68 int FloatTypes[] = {
69 (int)MVT::f32,
70 (int)MVT::f64
71 };
72
73 int VectorTypes[] = {
74 (int)MVT::v2i8,
75 (int)MVT::v4i8,
76 (int)MVT::v2i16,
77 (int)MVT::v4i16,
78 (int)MVT::v4f32,
79 (int)MVT::v4i32,
80 (int)MVT::v2f32,
81 (int)MVT::v2i32,
82 (int)MVT::v2f64,
83 (int)MVT::v2i64
84 };
85 size_t NumTypes = sizeof(types) / sizeof(*types);
86 size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
87 size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
88 size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
89
90 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
91 // These are the current register classes that are
92 // supported
93
94 for (unsigned int x = 0; x < NumTypes; ++x) {
95 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
96
97 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
98 // We cannot sextinreg, expand to shifts
99 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
100 setOperationAction(ISD::SUBE, VT, Expand);
101 setOperationAction(ISD::SUBC, VT, Expand);
102 setOperationAction(ISD::ADDE, VT, Expand);
103 setOperationAction(ISD::ADDC, VT, Expand);
104 setOperationAction(ISD::BRCOND, VT, Custom);
105 setOperationAction(ISD::BR_JT, VT, Expand);
106 setOperationAction(ISD::BRIND, VT, Expand);
107 // TODO: Implement custom UREM/SREM routines
108 setOperationAction(ISD::SREM, VT, Expand);
109 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
110 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
111 if (VT != MVT::i64 && VT != MVT::v2i64) {
112 setOperationAction(ISD::SDIV, VT, Custom);
113 }
114 }
115 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
116 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
117
118 // IL does not have these operations for floating point types
119 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
120 setOperationAction(ISD::SETOLT, VT, Expand);
121 setOperationAction(ISD::SETOGE, VT, Expand);
122 setOperationAction(ISD::SETOGT, VT, Expand);
123 setOperationAction(ISD::SETOLE, VT, Expand);
124 setOperationAction(ISD::SETULT, VT, Expand);
125 setOperationAction(ISD::SETUGE, VT, Expand);
126 setOperationAction(ISD::SETUGT, VT, Expand);
127 setOperationAction(ISD::SETULE, VT, Expand);
128 }
129
130 for (unsigned int x = 0; x < NumIntTypes; ++x) {
131 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
132
133 // GPU also does not have divrem function for signed or unsigned
134 setOperationAction(ISD::SDIVREM, VT, Expand);
135
136 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
137 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
138 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
139
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000140 setOperationAction(ISD::BSWAP, VT, Expand);
141
142 // GPU doesn't have any counting operators
143 setOperationAction(ISD::CTPOP, VT, Expand);
144 setOperationAction(ISD::CTTZ, VT, Expand);
145 setOperationAction(ISD::CTLZ, VT, Expand);
146 }
147
148 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
149 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
150
151 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
152 setOperationAction(ISD::SDIVREM, VT, Expand);
153 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
154 // setOperationAction(ISD::VSETCC, VT, Expand);
155 setOperationAction(ISD::SELECT_CC, VT, Expand);
156
157 }
Tom Stellard3ff0abf2013-06-07 20:37:48 +0000158 setOperationAction(ISD::MULHU, MVT::i64, Expand);
159 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
160 setOperationAction(ISD::MULHS, MVT::i64, Expand);
161 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
162 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
163 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
164 setOperationAction(ISD::Constant , MVT::i64 , Legal);
165 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
166 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
167 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
168 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
169 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
170 if (STM.hasHWFP64()) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000171 // we support loading/storing v2f64 but not operations on the type
172 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
173 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
174 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
175 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
176 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
177 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
178 // We want to expand vector conversions into their scalar
179 // counterparts.
180 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
181 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
182 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
183 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
184 setOperationAction(ISD::FABS, MVT::f64, Expand);
185 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
186 }
187 // TODO: Fix the UDIV24 algorithm so it works for these
188 // types correctly. This needs vector comparisons
189 // for this to work correctly.
190 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
191 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
192 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
193 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
195 setOperationAction(ISD::SUBC, MVT::Other, Expand);
196 setOperationAction(ISD::ADDE, MVT::Other, Expand);
197 setOperationAction(ISD::ADDC, MVT::Other, Expand);
198 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
199 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
200 setOperationAction(ISD::BRIND, MVT::Other, Expand);
201 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
202
203
204 // Use the default implementation.
205 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
206 setOperationAction(ISD::Constant , MVT::i32 , Legal);
207
208 setSchedulingPreference(Sched::RegPressure);
209 setPow2DivIsCheap(false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000210 setSelectIsExpensive(true);
211 setJumpIsExpensive(true);
212
Jim Grosbach64f3e762013-02-20 21:31:28 +0000213 MaxStoresPerMemcpy = 4096;
214 MaxStoresPerMemmove = 4096;
215 MaxStoresPerMemset = 4096;
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000216
217}
218
219bool
220AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
221 const CallInst &I, unsigned Intrinsic) const {
222 return false;
223}
224
225// The backend supports 32 and 64 bit floating point immediates
226bool
227AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
228 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
229 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
230 return true;
231 } else {
232 return false;
233 }
234}
235
236bool
237AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
238 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
239 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
240 return false;
241 } else {
242 return true;
243 }
244}
245
246
247// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
248// be zero. Op is expected to be a target specific node. Used by DAG
249// combiner.
250
251void
252AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
253 const SDValue Op,
254 APInt &KnownZero,
255 APInt &KnownOne,
256 const SelectionDAG &DAG,
257 unsigned Depth) const {
258 APInt KnownZero2;
259 APInt KnownOne2;
260 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
261 switch (Op.getOpcode()) {
262 default: break;
263 case ISD::SELECT_CC:
264 DAG.ComputeMaskedBits(
265 Op.getOperand(1),
266 KnownZero,
267 KnownOne,
268 Depth + 1
269 );
270 DAG.ComputeMaskedBits(
271 Op.getOperand(0),
272 KnownZero2,
273 KnownOne2
274 );
275 assert((KnownZero & KnownOne) == 0
276 && "Bits known to be one AND zero?");
277 assert((KnownZero2 & KnownOne2) == 0
278 && "Bits known to be one AND zero?");
279 // Only known if known in both the LHS and RHS
280 KnownOne &= KnownOne2;
281 KnownZero &= KnownZero2;
282 break;
283 };
284}
285
286//===----------------------------------------------------------------------===//
287// Other Lowering Hooks
288//===----------------------------------------------------------------------===//
289
290SDValue
291AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
292 EVT OVT = Op.getValueType();
293 SDValue DST;
294 if (OVT.getScalarType() == MVT::i64) {
295 DST = LowerSDIV64(Op, DAG);
296 } else if (OVT.getScalarType() == MVT::i32) {
297 DST = LowerSDIV32(Op, DAG);
298 } else if (OVT.getScalarType() == MVT::i16
299 || OVT.getScalarType() == MVT::i8) {
300 DST = LowerSDIV24(Op, DAG);
301 } else {
302 DST = SDValue(Op.getNode(), 0);
303 }
304 return DST;
305}
306
307SDValue
308AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
309 EVT OVT = Op.getValueType();
310 SDValue DST;
311 if (OVT.getScalarType() == MVT::i64) {
312 DST = LowerSREM64(Op, DAG);
313 } else if (OVT.getScalarType() == MVT::i32) {
314 DST = LowerSREM32(Op, DAG);
315 } else if (OVT.getScalarType() == MVT::i16) {
316 DST = LowerSREM16(Op, DAG);
317 } else if (OVT.getScalarType() == MVT::i8) {
318 DST = LowerSREM8(Op, DAG);
319 } else {
320 DST = SDValue(Op.getNode(), 0);
321 }
322 return DST;
323}
324
325SDValue
326AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
327 SDValue Data = Op.getOperand(0);
328 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
Andrew Trickac6d9be2013-05-25 02:42:55 +0000329 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000330 EVT DVT = Data.getValueType();
331 EVT BVT = BaseType->getVT();
332 unsigned baseBits = BVT.getScalarType().getSizeInBits();
333 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
334 unsigned shiftBits = srcBits - baseBits;
335 if (srcBits < 32) {
336 // If the op is less than 32 bits, then it needs to extend to 32bits
337 // so it can properly keep the upper bits valid.
338 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
339 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
340 shiftBits = 32 - baseBits;
341 DVT = IVT;
342 }
343 SDValue Shift = DAG.getConstant(shiftBits, DVT);
344 // Shift left by 'Shift' bits.
345 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
346 // Signed shift Right by 'Shift' bits.
347 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
348 if (srcBits < 32) {
349 // Once the sign extension is done, the op needs to be converted to
350 // its original type.
351 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
352 }
353 return Data;
354}
355EVT
356AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
357 int iSize = (size * numEle);
358 int vEle = (iSize >> ((size == 64) ? 6 : 5));
359 if (!vEle) {
360 vEle = 1;
361 }
362 if (size == 64) {
363 if (vEle == 1) {
364 return EVT(MVT::i64);
365 } else {
366 return EVT(MVT::getVectorVT(MVT::i64, vEle));
367 }
368 } else {
369 if (vEle == 1) {
370 return EVT(MVT::i32);
371 } else {
372 return EVT(MVT::getVectorVT(MVT::i32, vEle));
373 }
374 }
375}
376
377SDValue
378AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
379 SDValue Chain = Op.getOperand(0);
380 SDValue Cond = Op.getOperand(1);
381 SDValue Jump = Op.getOperand(2);
382 SDValue Result;
383 Result = DAG.getNode(
384 AMDGPUISD::BRANCH_COND,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000385 SDLoc(Op),
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000386 Op.getValueType(),
387 Chain, Jump, Cond);
388 return Result;
389}
390
391SDValue
392AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000393 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000394 EVT OVT = Op.getValueType();
395 SDValue LHS = Op.getOperand(0);
396 SDValue RHS = Op.getOperand(1);
397 MVT INTTY;
398 MVT FLTTY;
399 if (!OVT.isVector()) {
400 INTTY = MVT::i32;
401 FLTTY = MVT::f32;
402 } else if (OVT.getVectorNumElements() == 2) {
403 INTTY = MVT::v2i32;
404 FLTTY = MVT::v2f32;
405 } else if (OVT.getVectorNumElements() == 4) {
406 INTTY = MVT::v4i32;
407 FLTTY = MVT::v4f32;
408 }
409 unsigned bitsize = OVT.getScalarType().getSizeInBits();
410 // char|short jq = ia ^ ib;
411 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
412
413 // jq = jq >> (bitsize - 2)
414 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
415
416 // jq = jq | 0x1
417 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
418
419 // jq = (int)jq
420 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
421
422 // int ia = (int)LHS;
423 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
424
425 // int ib, (int)RHS;
426 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
427
428 // float fa = (float)ia;
429 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
430
431 // float fb = (float)ib;
432 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
433
434 // float fq = native_divide(fa, fb);
435 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
436
437 // fq = trunc(fq);
438 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
439
440 // float fqneg = -fq;
441 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
442
443 // float fr = mad(fqneg, fb, fa);
Vincent Lejeunee3111962013-02-18 14:11:28 +0000444 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
445 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000446
447 // int iq = (int)fq;
448 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
449
450 // fr = fabs(fr);
451 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
452
453 // fb = fabs(fb);
454 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
455
456 // int cv = fr >= fb;
457 SDValue cv;
458 if (INTTY == MVT::i32) {
459 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
460 } else {
461 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
462 }
463 // jq = (cv ? jq : 0);
464 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
465 DAG.getConstant(0, OVT));
466 // dst = iq + jq;
467 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
468 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
469 return iq;
470}
471
472SDValue
473AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000474 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000475 EVT OVT = Op.getValueType();
476 SDValue LHS = Op.getOperand(0);
477 SDValue RHS = Op.getOperand(1);
478 // The LowerSDIV32 function generates equivalent to the following IL.
479 // mov r0, LHS
480 // mov r1, RHS
481 // ilt r10, r0, 0
482 // ilt r11, r1, 0
483 // iadd r0, r0, r10
484 // iadd r1, r1, r11
485 // ixor r0, r0, r10
486 // ixor r1, r1, r11
487 // udiv r0, r0, r1
488 // ixor r10, r10, r11
489 // iadd r0, r0, r10
490 // ixor DST, r0, r10
491
492 // mov r0, LHS
493 SDValue r0 = LHS;
494
495 // mov r1, RHS
496 SDValue r1 = RHS;
497
498 // ilt r10, r0, 0
499 SDValue r10 = DAG.getSelectCC(DL,
500 r0, DAG.getConstant(0, OVT),
501 DAG.getConstant(-1, MVT::i32),
502 DAG.getConstant(0, MVT::i32),
503 ISD::SETLT);
504
505 // ilt r11, r1, 0
506 SDValue r11 = DAG.getSelectCC(DL,
507 r1, DAG.getConstant(0, OVT),
508 DAG.getConstant(-1, MVT::i32),
509 DAG.getConstant(0, MVT::i32),
510 ISD::SETLT);
511
512 // iadd r0, r0, r10
513 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
514
515 // iadd r1, r1, r11
516 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
517
518 // ixor r0, r0, r10
519 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
520
521 // ixor r1, r1, r11
522 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
523
524 // udiv r0, r0, r1
525 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
526
527 // ixor r10, r10, r11
528 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
529
530 // iadd r0, r0, r10
531 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
532
533 // ixor DST, r0, r10
534 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
535 return DST;
536}
537
538SDValue
539AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
540 return SDValue(Op.getNode(), 0);
541}
542
543SDValue
544AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000545 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000546 EVT OVT = Op.getValueType();
547 MVT INTTY = MVT::i32;
548 if (OVT == MVT::v2i8) {
549 INTTY = MVT::v2i32;
550 } else if (OVT == MVT::v4i8) {
551 INTTY = MVT::v4i32;
552 }
553 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
554 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
555 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
556 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
557 return LHS;
558}
559
560SDValue
561AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000562 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000563 EVT OVT = Op.getValueType();
564 MVT INTTY = MVT::i32;
565 if (OVT == MVT::v2i16) {
566 INTTY = MVT::v2i32;
567 } else if (OVT == MVT::v4i16) {
568 INTTY = MVT::v4i32;
569 }
570 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
571 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
572 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
573 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
574 return LHS;
575}
576
577SDValue
578AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000579 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000580 EVT OVT = Op.getValueType();
581 SDValue LHS = Op.getOperand(0);
582 SDValue RHS = Op.getOperand(1);
583 // The LowerSREM32 function generates equivalent to the following IL.
584 // mov r0, LHS
585 // mov r1, RHS
586 // ilt r10, r0, 0
587 // ilt r11, r1, 0
588 // iadd r0, r0, r10
589 // iadd r1, r1, r11
590 // ixor r0, r0, r10
591 // ixor r1, r1, r11
592 // udiv r20, r0, r1
593 // umul r20, r20, r1
594 // sub r0, r0, r20
595 // iadd r0, r0, r10
596 // ixor DST, r0, r10
597
598 // mov r0, LHS
599 SDValue r0 = LHS;
600
601 // mov r1, RHS
602 SDValue r1 = RHS;
603
604 // ilt r10, r0, 0
605 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
606
607 // ilt r11, r1, 0
608 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
609
610 // iadd r0, r0, r10
611 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
612
613 // iadd r1, r1, r11
614 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
615
616 // ixor r0, r0, r10
617 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
618
619 // ixor r1, r1, r11
620 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
621
622 // udiv r20, r0, r1
623 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
624
625 // umul r20, r20, r1
626 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
627
628 // sub r0, r0, r20
629 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
630
631 // iadd r0, r0, r10
632 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
633
634 // ixor DST, r0, r10
635 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
636 return DST;
637}
638
639SDValue
640AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
641 return SDValue(Op.getNode(), 0);
642}