blob: 0168906bf6d67670da1d15701dbb98690ad509f9 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief TargetLowering functions borrowed from AMDIL.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUISelLowering.h"
16#include "AMDGPURegisterInfo.h"
Chandler Carruth58a2cbe2013-01-02 10:22:59 +000017#include "AMDGPUSubtarget.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000018#include "AMDILDevices.h"
19#include "AMDILIntrinsicInfo.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/PseudoSourceValue.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGNodes.h"
25#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth0b8c9a82013-01-02 11:36:10 +000026#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Intrinsics.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000030#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetOptions.h"
33
34using namespace llvm;
35//===----------------------------------------------------------------------===//
Tom Stellardf98f2ce2012-12-11 21:25:42 +000036// TargetLowering Implementation Help Functions End
37//===----------------------------------------------------------------------===//
38
39//===----------------------------------------------------------------------===//
40// TargetLowering Class Implementation Begins
41//===----------------------------------------------------------------------===//
42void AMDGPUTargetLowering::InitAMDILLowering() {
43 int types[] = {
44 (int)MVT::i8,
45 (int)MVT::i16,
46 (int)MVT::i32,
47 (int)MVT::f32,
48 (int)MVT::f64,
49 (int)MVT::i64,
50 (int)MVT::v2i8,
51 (int)MVT::v4i8,
52 (int)MVT::v2i16,
53 (int)MVT::v4i16,
54 (int)MVT::v4f32,
55 (int)MVT::v4i32,
56 (int)MVT::v2f32,
57 (int)MVT::v2i32,
58 (int)MVT::v2f64,
59 (int)MVT::v2i64
60 };
61
62 int IntTypes[] = {
63 (int)MVT::i8,
64 (int)MVT::i16,
65 (int)MVT::i32,
66 (int)MVT::i64
67 };
68
69 int FloatTypes[] = {
70 (int)MVT::f32,
71 (int)MVT::f64
72 };
73
74 int VectorTypes[] = {
75 (int)MVT::v2i8,
76 (int)MVT::v4i8,
77 (int)MVT::v2i16,
78 (int)MVT::v4i16,
79 (int)MVT::v4f32,
80 (int)MVT::v4i32,
81 (int)MVT::v2f32,
82 (int)MVT::v2i32,
83 (int)MVT::v2f64,
84 (int)MVT::v2i64
85 };
86 size_t NumTypes = sizeof(types) / sizeof(*types);
87 size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
88 size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
89 size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
90
91 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
92 // These are the current register classes that are
93 // supported
94
95 for (unsigned int x = 0; x < NumTypes; ++x) {
96 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
97
98 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
99 // We cannot sextinreg, expand to shifts
100 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
101 setOperationAction(ISD::SUBE, VT, Expand);
102 setOperationAction(ISD::SUBC, VT, Expand);
103 setOperationAction(ISD::ADDE, VT, Expand);
104 setOperationAction(ISD::ADDC, VT, Expand);
105 setOperationAction(ISD::BRCOND, VT, Custom);
106 setOperationAction(ISD::BR_JT, VT, Expand);
107 setOperationAction(ISD::BRIND, VT, Expand);
108 // TODO: Implement custom UREM/SREM routines
109 setOperationAction(ISD::SREM, VT, Expand);
110 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
111 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
112 if (VT != MVT::i64 && VT != MVT::v2i64) {
113 setOperationAction(ISD::SDIV, VT, Custom);
114 }
115 }
116 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
117 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
118
119 // IL does not have these operations for floating point types
120 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
121 setOperationAction(ISD::SETOLT, VT, Expand);
122 setOperationAction(ISD::SETOGE, VT, Expand);
123 setOperationAction(ISD::SETOGT, VT, Expand);
124 setOperationAction(ISD::SETOLE, VT, Expand);
125 setOperationAction(ISD::SETULT, VT, Expand);
126 setOperationAction(ISD::SETUGE, VT, Expand);
127 setOperationAction(ISD::SETUGT, VT, Expand);
128 setOperationAction(ISD::SETULE, VT, Expand);
129 }
130
131 for (unsigned int x = 0; x < NumIntTypes; ++x) {
132 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
133
134 // GPU also does not have divrem function for signed or unsigned
135 setOperationAction(ISD::SDIVREM, VT, Expand);
136
137 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
138 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
139 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
140
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000141 setOperationAction(ISD::BSWAP, VT, Expand);
142
143 // GPU doesn't have any counting operators
144 setOperationAction(ISD::CTPOP, VT, Expand);
145 setOperationAction(ISD::CTTZ, VT, Expand);
146 setOperationAction(ISD::CTLZ, VT, Expand);
147 }
148
149 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
150 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
151
152 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
153 setOperationAction(ISD::SDIVREM, VT, Expand);
154 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
155 // setOperationAction(ISD::VSETCC, VT, Expand);
156 setOperationAction(ISD::SELECT_CC, VT, Expand);
157
158 }
159 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
160 setOperationAction(ISD::MULHU, MVT::i64, Expand);
161 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
162 setOperationAction(ISD::MULHS, MVT::i64, Expand);
163 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
164 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
165 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
166 setOperationAction(ISD::Constant , MVT::i64 , Legal);
167 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
168 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
169 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
170 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
171 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
172 }
173 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
174 // we support loading/storing v2f64 but not operations on the type
175 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
176 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
177 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
178 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
179 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
180 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
181 // We want to expand vector conversions into their scalar
182 // counterparts.
183 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
184 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
185 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
186 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
187 setOperationAction(ISD::FABS, MVT::f64, Expand);
188 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
189 }
190 // TODO: Fix the UDIV24 algorithm so it works for these
191 // types correctly. This needs vector comparisons
192 // for this to work correctly.
193 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
194 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
195 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
196 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
197 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
198 setOperationAction(ISD::SUBC, MVT::Other, Expand);
199 setOperationAction(ISD::ADDE, MVT::Other, Expand);
200 setOperationAction(ISD::ADDC, MVT::Other, Expand);
201 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
202 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
203 setOperationAction(ISD::BRIND, MVT::Other, Expand);
204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
205
206
207 // Use the default implementation.
208 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
209 setOperationAction(ISD::Constant , MVT::i32 , Legal);
210
211 setSchedulingPreference(Sched::RegPressure);
212 setPow2DivIsCheap(false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000213 setSelectIsExpensive(true);
214 setJumpIsExpensive(true);
215
Jim Grosbach64f3e762013-02-20 21:31:28 +0000216 MaxStoresPerMemcpy = 4096;
217 MaxStoresPerMemmove = 4096;
218 MaxStoresPerMemset = 4096;
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000219
220}
221
222bool
223AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
224 const CallInst &I, unsigned Intrinsic) const {
225 return false;
226}
227
228// The backend supports 32 and 64 bit floating point immediates
229bool
230AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
231 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
232 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
233 return true;
234 } else {
235 return false;
236 }
237}
238
239bool
240AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
241 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
242 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
243 return false;
244 } else {
245 return true;
246 }
247}
248
249
250// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
251// be zero. Op is expected to be a target specific node. Used by DAG
252// combiner.
253
254void
255AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
256 const SDValue Op,
257 APInt &KnownZero,
258 APInt &KnownOne,
259 const SelectionDAG &DAG,
260 unsigned Depth) const {
261 APInt KnownZero2;
262 APInt KnownOne2;
263 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
264 switch (Op.getOpcode()) {
265 default: break;
266 case ISD::SELECT_CC:
267 DAG.ComputeMaskedBits(
268 Op.getOperand(1),
269 KnownZero,
270 KnownOne,
271 Depth + 1
272 );
273 DAG.ComputeMaskedBits(
274 Op.getOperand(0),
275 KnownZero2,
276 KnownOne2
277 );
278 assert((KnownZero & KnownOne) == 0
279 && "Bits known to be one AND zero?");
280 assert((KnownZero2 & KnownOne2) == 0
281 && "Bits known to be one AND zero?");
282 // Only known if known in both the LHS and RHS
283 KnownOne &= KnownOne2;
284 KnownZero &= KnownZero2;
285 break;
286 };
287}
288
289//===----------------------------------------------------------------------===//
290// Other Lowering Hooks
291//===----------------------------------------------------------------------===//
292
293SDValue
294AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
295 EVT OVT = Op.getValueType();
296 SDValue DST;
297 if (OVT.getScalarType() == MVT::i64) {
298 DST = LowerSDIV64(Op, DAG);
299 } else if (OVT.getScalarType() == MVT::i32) {
300 DST = LowerSDIV32(Op, DAG);
301 } else if (OVT.getScalarType() == MVT::i16
302 || OVT.getScalarType() == MVT::i8) {
303 DST = LowerSDIV24(Op, DAG);
304 } else {
305 DST = SDValue(Op.getNode(), 0);
306 }
307 return DST;
308}
309
310SDValue
311AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
312 EVT OVT = Op.getValueType();
313 SDValue DST;
314 if (OVT.getScalarType() == MVT::i64) {
315 DST = LowerSREM64(Op, DAG);
316 } else if (OVT.getScalarType() == MVT::i32) {
317 DST = LowerSREM32(Op, DAG);
318 } else if (OVT.getScalarType() == MVT::i16) {
319 DST = LowerSREM16(Op, DAG);
320 } else if (OVT.getScalarType() == MVT::i8) {
321 DST = LowerSREM8(Op, DAG);
322 } else {
323 DST = SDValue(Op.getNode(), 0);
324 }
325 return DST;
326}
327
328SDValue
329AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
330 SDValue Data = Op.getOperand(0);
331 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
Andrew Trickac6d9be2013-05-25 02:42:55 +0000332 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000333 EVT DVT = Data.getValueType();
334 EVT BVT = BaseType->getVT();
335 unsigned baseBits = BVT.getScalarType().getSizeInBits();
336 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
337 unsigned shiftBits = srcBits - baseBits;
338 if (srcBits < 32) {
339 // If the op is less than 32 bits, then it needs to extend to 32bits
340 // so it can properly keep the upper bits valid.
341 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
342 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
343 shiftBits = 32 - baseBits;
344 DVT = IVT;
345 }
346 SDValue Shift = DAG.getConstant(shiftBits, DVT);
347 // Shift left by 'Shift' bits.
348 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
349 // Signed shift Right by 'Shift' bits.
350 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
351 if (srcBits < 32) {
352 // Once the sign extension is done, the op needs to be converted to
353 // its original type.
354 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
355 }
356 return Data;
357}
358EVT
359AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
360 int iSize = (size * numEle);
361 int vEle = (iSize >> ((size == 64) ? 6 : 5));
362 if (!vEle) {
363 vEle = 1;
364 }
365 if (size == 64) {
366 if (vEle == 1) {
367 return EVT(MVT::i64);
368 } else {
369 return EVT(MVT::getVectorVT(MVT::i64, vEle));
370 }
371 } else {
372 if (vEle == 1) {
373 return EVT(MVT::i32);
374 } else {
375 return EVT(MVT::getVectorVT(MVT::i32, vEle));
376 }
377 }
378}
379
380SDValue
381AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
382 SDValue Chain = Op.getOperand(0);
383 SDValue Cond = Op.getOperand(1);
384 SDValue Jump = Op.getOperand(2);
385 SDValue Result;
386 Result = DAG.getNode(
387 AMDGPUISD::BRANCH_COND,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000388 SDLoc(Op),
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000389 Op.getValueType(),
390 Chain, Jump, Cond);
391 return Result;
392}
393
394SDValue
395AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000396 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000397 EVT OVT = Op.getValueType();
398 SDValue LHS = Op.getOperand(0);
399 SDValue RHS = Op.getOperand(1);
400 MVT INTTY;
401 MVT FLTTY;
402 if (!OVT.isVector()) {
403 INTTY = MVT::i32;
404 FLTTY = MVT::f32;
405 } else if (OVT.getVectorNumElements() == 2) {
406 INTTY = MVT::v2i32;
407 FLTTY = MVT::v2f32;
408 } else if (OVT.getVectorNumElements() == 4) {
409 INTTY = MVT::v4i32;
410 FLTTY = MVT::v4f32;
411 }
412 unsigned bitsize = OVT.getScalarType().getSizeInBits();
413 // char|short jq = ia ^ ib;
414 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
415
416 // jq = jq >> (bitsize - 2)
417 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
418
419 // jq = jq | 0x1
420 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
421
422 // jq = (int)jq
423 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
424
425 // int ia = (int)LHS;
426 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
427
428 // int ib, (int)RHS;
429 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
430
431 // float fa = (float)ia;
432 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
433
434 // float fb = (float)ib;
435 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
436
437 // float fq = native_divide(fa, fb);
438 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
439
440 // fq = trunc(fq);
441 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
442
443 // float fqneg = -fq;
444 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
445
446 // float fr = mad(fqneg, fb, fa);
Vincent Lejeunee3111962013-02-18 14:11:28 +0000447 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
448 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000449
450 // int iq = (int)fq;
451 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
452
453 // fr = fabs(fr);
454 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
455
456 // fb = fabs(fb);
457 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
458
459 // int cv = fr >= fb;
460 SDValue cv;
461 if (INTTY == MVT::i32) {
462 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
463 } else {
464 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
465 }
466 // jq = (cv ? jq : 0);
467 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
468 DAG.getConstant(0, OVT));
469 // dst = iq + jq;
470 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
471 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
472 return iq;
473}
474
475SDValue
476AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000477 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000478 EVT OVT = Op.getValueType();
479 SDValue LHS = Op.getOperand(0);
480 SDValue RHS = Op.getOperand(1);
481 // The LowerSDIV32 function generates equivalent to the following IL.
482 // mov r0, LHS
483 // mov r1, RHS
484 // ilt r10, r0, 0
485 // ilt r11, r1, 0
486 // iadd r0, r0, r10
487 // iadd r1, r1, r11
488 // ixor r0, r0, r10
489 // ixor r1, r1, r11
490 // udiv r0, r0, r1
491 // ixor r10, r10, r11
492 // iadd r0, r0, r10
493 // ixor DST, r0, r10
494
495 // mov r0, LHS
496 SDValue r0 = LHS;
497
498 // mov r1, RHS
499 SDValue r1 = RHS;
500
501 // ilt r10, r0, 0
502 SDValue r10 = DAG.getSelectCC(DL,
503 r0, DAG.getConstant(0, OVT),
504 DAG.getConstant(-1, MVT::i32),
505 DAG.getConstant(0, MVT::i32),
506 ISD::SETLT);
507
508 // ilt r11, r1, 0
509 SDValue r11 = DAG.getSelectCC(DL,
510 r1, DAG.getConstant(0, OVT),
511 DAG.getConstant(-1, MVT::i32),
512 DAG.getConstant(0, MVT::i32),
513 ISD::SETLT);
514
515 // iadd r0, r0, r10
516 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
517
518 // iadd r1, r1, r11
519 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
520
521 // ixor r0, r0, r10
522 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
523
524 // ixor r1, r1, r11
525 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
526
527 // udiv r0, r0, r1
528 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
529
530 // ixor r10, r10, r11
531 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
532
533 // iadd r0, r0, r10
534 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
535
536 // ixor DST, r0, r10
537 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
538 return DST;
539}
540
541SDValue
542AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
543 return SDValue(Op.getNode(), 0);
544}
545
546SDValue
547AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000548 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000549 EVT OVT = Op.getValueType();
550 MVT INTTY = MVT::i32;
551 if (OVT == MVT::v2i8) {
552 INTTY = MVT::v2i32;
553 } else if (OVT == MVT::v4i8) {
554 INTTY = MVT::v4i32;
555 }
556 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
557 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
558 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
559 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
560 return LHS;
561}
562
563SDValue
564AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000565 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000566 EVT OVT = Op.getValueType();
567 MVT INTTY = MVT::i32;
568 if (OVT == MVT::v2i16) {
569 INTTY = MVT::v2i32;
570 } else if (OVT == MVT::v4i16) {
571 INTTY = MVT::v4i32;
572 }
573 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
574 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
575 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
576 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
577 return LHS;
578}
579
580SDValue
581AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000582 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000583 EVT OVT = Op.getValueType();
584 SDValue LHS = Op.getOperand(0);
585 SDValue RHS = Op.getOperand(1);
586 // The LowerSREM32 function generates equivalent to the following IL.
587 // mov r0, LHS
588 // mov r1, RHS
589 // ilt r10, r0, 0
590 // ilt r11, r1, 0
591 // iadd r0, r0, r10
592 // iadd r1, r1, r11
593 // ixor r0, r0, r10
594 // ixor r1, r1, r11
595 // udiv r20, r0, r1
596 // umul r20, r20, r1
597 // sub r0, r0, r20
598 // iadd r0, r0, r10
599 // ixor DST, r0, r10
600
601 // mov r0, LHS
602 SDValue r0 = LHS;
603
604 // mov r1, RHS
605 SDValue r1 = RHS;
606
607 // ilt r10, r0, 0
608 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
609
610 // ilt r11, r1, 0
611 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
612
613 // iadd r0, r0, r10
614 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
615
616 // iadd r1, r1, r11
617 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
618
619 // ixor r0, r0, r10
620 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
621
622 // ixor r1, r1, r11
623 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
624
625 // udiv r20, r0, r1
626 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
627
628 // umul r20, r20, r1
629 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
630
631 // sub r0, r0, r20
632 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
633
634 // iadd r0, r0, r10
635 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
636
637 // ixor DST, r0, r10
638 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
639 return DST;
640}
641
642SDValue
643AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
644 return SDValue(Op.getNode(), 0);
645}