blob: 2e60adcc99faf018c4435ad62539f57916b57e70 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief TargetLowering functions borrowed from AMDIL.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUISelLowering.h"
16#include "AMDGPURegisterInfo.h"
Chandler Carruth58a2cbe2013-01-02 10:22:59 +000017#include "AMDGPUSubtarget.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000018#include "AMDILDevices.h"
19#include "AMDILIntrinsicInfo.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/PseudoSourceValue.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGNodes.h"
25#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth0b8c9a82013-01-02 11:36:10 +000026#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Intrinsics.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000030#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetOptions.h"
33
34using namespace llvm;
35//===----------------------------------------------------------------------===//
36// Calling Convention Implementation
37//===----------------------------------------------------------------------===//
38#include "AMDGPUGenCallingConv.inc"
39
40//===----------------------------------------------------------------------===//
41// TargetLowering Implementation Help Functions End
42//===----------------------------------------------------------------------===//
43
44//===----------------------------------------------------------------------===//
45// TargetLowering Class Implementation Begins
46//===----------------------------------------------------------------------===//
47void AMDGPUTargetLowering::InitAMDILLowering() {
48 int types[] = {
49 (int)MVT::i8,
50 (int)MVT::i16,
51 (int)MVT::i32,
52 (int)MVT::f32,
53 (int)MVT::f64,
54 (int)MVT::i64,
55 (int)MVT::v2i8,
56 (int)MVT::v4i8,
57 (int)MVT::v2i16,
58 (int)MVT::v4i16,
59 (int)MVT::v4f32,
60 (int)MVT::v4i32,
61 (int)MVT::v2f32,
62 (int)MVT::v2i32,
63 (int)MVT::v2f64,
64 (int)MVT::v2i64
65 };
66
67 int IntTypes[] = {
68 (int)MVT::i8,
69 (int)MVT::i16,
70 (int)MVT::i32,
71 (int)MVT::i64
72 };
73
74 int FloatTypes[] = {
75 (int)MVT::f32,
76 (int)MVT::f64
77 };
78
79 int VectorTypes[] = {
80 (int)MVT::v2i8,
81 (int)MVT::v4i8,
82 (int)MVT::v2i16,
83 (int)MVT::v4i16,
84 (int)MVT::v4f32,
85 (int)MVT::v4i32,
86 (int)MVT::v2f32,
87 (int)MVT::v2i32,
88 (int)MVT::v2f64,
89 (int)MVT::v2i64
90 };
91 size_t NumTypes = sizeof(types) / sizeof(*types);
92 size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
93 size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
94 size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
95
96 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
97 // These are the current register classes that are
98 // supported
99
100 for (unsigned int x = 0; x < NumTypes; ++x) {
101 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
102
103 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
104 // We cannot sextinreg, expand to shifts
105 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
106 setOperationAction(ISD::SUBE, VT, Expand);
107 setOperationAction(ISD::SUBC, VT, Expand);
108 setOperationAction(ISD::ADDE, VT, Expand);
109 setOperationAction(ISD::ADDC, VT, Expand);
110 setOperationAction(ISD::BRCOND, VT, Custom);
111 setOperationAction(ISD::BR_JT, VT, Expand);
112 setOperationAction(ISD::BRIND, VT, Expand);
113 // TODO: Implement custom UREM/SREM routines
114 setOperationAction(ISD::SREM, VT, Expand);
115 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
116 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
117 if (VT != MVT::i64 && VT != MVT::v2i64) {
118 setOperationAction(ISD::SDIV, VT, Custom);
119 }
120 }
121 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
122 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
123
124 // IL does not have these operations for floating point types
125 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
126 setOperationAction(ISD::SETOLT, VT, Expand);
127 setOperationAction(ISD::SETOGE, VT, Expand);
128 setOperationAction(ISD::SETOGT, VT, Expand);
129 setOperationAction(ISD::SETOLE, VT, Expand);
130 setOperationAction(ISD::SETULT, VT, Expand);
131 setOperationAction(ISD::SETUGE, VT, Expand);
132 setOperationAction(ISD::SETUGT, VT, Expand);
133 setOperationAction(ISD::SETULE, VT, Expand);
134 }
135
136 for (unsigned int x = 0; x < NumIntTypes; ++x) {
137 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
138
139 // GPU also does not have divrem function for signed or unsigned
140 setOperationAction(ISD::SDIVREM, VT, Expand);
141
142 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
143 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
144 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
145
146 // GPU doesn't have a rotl, rotr, or byteswap instruction
147 setOperationAction(ISD::ROTR, VT, Expand);
148 setOperationAction(ISD::BSWAP, VT, Expand);
149
150 // GPU doesn't have any counting operators
151 setOperationAction(ISD::CTPOP, VT, Expand);
152 setOperationAction(ISD::CTTZ, VT, Expand);
153 setOperationAction(ISD::CTLZ, VT, Expand);
154 }
155
156 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
157 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
158
159 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
160 setOperationAction(ISD::SDIVREM, VT, Expand);
161 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
162 // setOperationAction(ISD::VSETCC, VT, Expand);
163 setOperationAction(ISD::SELECT_CC, VT, Expand);
164
165 }
166 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
167 setOperationAction(ISD::MULHU, MVT::i64, Expand);
168 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
169 setOperationAction(ISD::MULHS, MVT::i64, Expand);
170 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
171 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
172 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
173 setOperationAction(ISD::Constant , MVT::i64 , Legal);
174 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
175 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
176 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
177 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
178 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
179 }
180 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
181 // we support loading/storing v2f64 but not operations on the type
182 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
183 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
184 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
185 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
186 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
187 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
188 // We want to expand vector conversions into their scalar
189 // counterparts.
190 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
191 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
192 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
193 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
194 setOperationAction(ISD::FABS, MVT::f64, Expand);
195 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
196 }
197 // TODO: Fix the UDIV24 algorithm so it works for these
198 // types correctly. This needs vector comparisons
199 // for this to work correctly.
200 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
201 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
202 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
203 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
205 setOperationAction(ISD::SUBC, MVT::Other, Expand);
206 setOperationAction(ISD::ADDE, MVT::Other, Expand);
207 setOperationAction(ISD::ADDC, MVT::Other, Expand);
208 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
209 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
210 setOperationAction(ISD::BRIND, MVT::Other, Expand);
211 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
212
213
214 // Use the default implementation.
215 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
216 setOperationAction(ISD::Constant , MVT::i32 , Legal);
217
218 setSchedulingPreference(Sched::RegPressure);
219 setPow2DivIsCheap(false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000220 setSelectIsExpensive(true);
221 setJumpIsExpensive(true);
222
223 maxStoresPerMemcpy = 4096;
224 maxStoresPerMemmove = 4096;
225 maxStoresPerMemset = 4096;
226
227}
228
229bool
230AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
231 const CallInst &I, unsigned Intrinsic) const {
232 return false;
233}
234
235// The backend supports 32 and 64 bit floating point immediates
236bool
237AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
238 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
239 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
240 return true;
241 } else {
242 return false;
243 }
244}
245
246bool
247AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
248 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
249 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
250 return false;
251 } else {
252 return true;
253 }
254}
255
256
257// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
258// be zero. Op is expected to be a target specific node. Used by DAG
259// combiner.
260
261void
262AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
263 const SDValue Op,
264 APInt &KnownZero,
265 APInt &KnownOne,
266 const SelectionDAG &DAG,
267 unsigned Depth) const {
268 APInt KnownZero2;
269 APInt KnownOne2;
270 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
271 switch (Op.getOpcode()) {
272 default: break;
273 case ISD::SELECT_CC:
274 DAG.ComputeMaskedBits(
275 Op.getOperand(1),
276 KnownZero,
277 KnownOne,
278 Depth + 1
279 );
280 DAG.ComputeMaskedBits(
281 Op.getOperand(0),
282 KnownZero2,
283 KnownOne2
284 );
285 assert((KnownZero & KnownOne) == 0
286 && "Bits known to be one AND zero?");
287 assert((KnownZero2 & KnownOne2) == 0
288 && "Bits known to be one AND zero?");
289 // Only known if known in both the LHS and RHS
290 KnownOne &= KnownOne2;
291 KnownZero &= KnownZero2;
292 break;
293 };
294}
295
296//===----------------------------------------------------------------------===//
297// Other Lowering Hooks
298//===----------------------------------------------------------------------===//
299
300SDValue
301AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
302 EVT OVT = Op.getValueType();
303 SDValue DST;
304 if (OVT.getScalarType() == MVT::i64) {
305 DST = LowerSDIV64(Op, DAG);
306 } else if (OVT.getScalarType() == MVT::i32) {
307 DST = LowerSDIV32(Op, DAG);
308 } else if (OVT.getScalarType() == MVT::i16
309 || OVT.getScalarType() == MVT::i8) {
310 DST = LowerSDIV24(Op, DAG);
311 } else {
312 DST = SDValue(Op.getNode(), 0);
313 }
314 return DST;
315}
316
317SDValue
318AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
319 EVT OVT = Op.getValueType();
320 SDValue DST;
321 if (OVT.getScalarType() == MVT::i64) {
322 DST = LowerSREM64(Op, DAG);
323 } else if (OVT.getScalarType() == MVT::i32) {
324 DST = LowerSREM32(Op, DAG);
325 } else if (OVT.getScalarType() == MVT::i16) {
326 DST = LowerSREM16(Op, DAG);
327 } else if (OVT.getScalarType() == MVT::i8) {
328 DST = LowerSREM8(Op, DAG);
329 } else {
330 DST = SDValue(Op.getNode(), 0);
331 }
332 return DST;
333}
334
335SDValue
336AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
337 SDValue Data = Op.getOperand(0);
338 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
339 DebugLoc DL = Op.getDebugLoc();
340 EVT DVT = Data.getValueType();
341 EVT BVT = BaseType->getVT();
342 unsigned baseBits = BVT.getScalarType().getSizeInBits();
343 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
344 unsigned shiftBits = srcBits - baseBits;
345 if (srcBits < 32) {
346 // If the op is less than 32 bits, then it needs to extend to 32bits
347 // so it can properly keep the upper bits valid.
348 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
349 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
350 shiftBits = 32 - baseBits;
351 DVT = IVT;
352 }
353 SDValue Shift = DAG.getConstant(shiftBits, DVT);
354 // Shift left by 'Shift' bits.
355 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
356 // Signed shift Right by 'Shift' bits.
357 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
358 if (srcBits < 32) {
359 // Once the sign extension is done, the op needs to be converted to
360 // its original type.
361 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
362 }
363 return Data;
364}
365EVT
366AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
367 int iSize = (size * numEle);
368 int vEle = (iSize >> ((size == 64) ? 6 : 5));
369 if (!vEle) {
370 vEle = 1;
371 }
372 if (size == 64) {
373 if (vEle == 1) {
374 return EVT(MVT::i64);
375 } else {
376 return EVT(MVT::getVectorVT(MVT::i64, vEle));
377 }
378 } else {
379 if (vEle == 1) {
380 return EVT(MVT::i32);
381 } else {
382 return EVT(MVT::getVectorVT(MVT::i32, vEle));
383 }
384 }
385}
386
387SDValue
388AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
389 SDValue Chain = Op.getOperand(0);
390 SDValue Cond = Op.getOperand(1);
391 SDValue Jump = Op.getOperand(2);
392 SDValue Result;
393 Result = DAG.getNode(
394 AMDGPUISD::BRANCH_COND,
395 Op.getDebugLoc(),
396 Op.getValueType(),
397 Chain, Jump, Cond);
398 return Result;
399}
400
401SDValue
402AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
403 DebugLoc DL = Op.getDebugLoc();
404 EVT OVT = Op.getValueType();
405 SDValue LHS = Op.getOperand(0);
406 SDValue RHS = Op.getOperand(1);
407 MVT INTTY;
408 MVT FLTTY;
409 if (!OVT.isVector()) {
410 INTTY = MVT::i32;
411 FLTTY = MVT::f32;
412 } else if (OVT.getVectorNumElements() == 2) {
413 INTTY = MVT::v2i32;
414 FLTTY = MVT::v2f32;
415 } else if (OVT.getVectorNumElements() == 4) {
416 INTTY = MVT::v4i32;
417 FLTTY = MVT::v4f32;
418 }
419 unsigned bitsize = OVT.getScalarType().getSizeInBits();
420 // char|short jq = ia ^ ib;
421 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
422
423 // jq = jq >> (bitsize - 2)
424 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
425
426 // jq = jq | 0x1
427 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
428
429 // jq = (int)jq
430 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
431
432 // int ia = (int)LHS;
433 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
434
435 // int ib, (int)RHS;
436 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
437
438 // float fa = (float)ia;
439 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
440
441 // float fb = (float)ib;
442 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
443
444 // float fq = native_divide(fa, fb);
445 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
446
447 // fq = trunc(fq);
448 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
449
450 // float fqneg = -fq;
451 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
452
453 // float fr = mad(fqneg, fb, fa);
454 SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
455
456 // int iq = (int)fq;
457 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
458
459 // fr = fabs(fr);
460 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
461
462 // fb = fabs(fb);
463 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
464
465 // int cv = fr >= fb;
466 SDValue cv;
467 if (INTTY == MVT::i32) {
468 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
469 } else {
470 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
471 }
472 // jq = (cv ? jq : 0);
473 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
474 DAG.getConstant(0, OVT));
475 // dst = iq + jq;
476 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
477 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
478 return iq;
479}
480
481SDValue
482AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
483 DebugLoc DL = Op.getDebugLoc();
484 EVT OVT = Op.getValueType();
485 SDValue LHS = Op.getOperand(0);
486 SDValue RHS = Op.getOperand(1);
487 // The LowerSDIV32 function generates equivalent to the following IL.
488 // mov r0, LHS
489 // mov r1, RHS
490 // ilt r10, r0, 0
491 // ilt r11, r1, 0
492 // iadd r0, r0, r10
493 // iadd r1, r1, r11
494 // ixor r0, r0, r10
495 // ixor r1, r1, r11
496 // udiv r0, r0, r1
497 // ixor r10, r10, r11
498 // iadd r0, r0, r10
499 // ixor DST, r0, r10
500
501 // mov r0, LHS
502 SDValue r0 = LHS;
503
504 // mov r1, RHS
505 SDValue r1 = RHS;
506
507 // ilt r10, r0, 0
508 SDValue r10 = DAG.getSelectCC(DL,
509 r0, DAG.getConstant(0, OVT),
510 DAG.getConstant(-1, MVT::i32),
511 DAG.getConstant(0, MVT::i32),
512 ISD::SETLT);
513
514 // ilt r11, r1, 0
515 SDValue r11 = DAG.getSelectCC(DL,
516 r1, DAG.getConstant(0, OVT),
517 DAG.getConstant(-1, MVT::i32),
518 DAG.getConstant(0, MVT::i32),
519 ISD::SETLT);
520
521 // iadd r0, r0, r10
522 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
523
524 // iadd r1, r1, r11
525 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
526
527 // ixor r0, r0, r10
528 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
529
530 // ixor r1, r1, r11
531 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
532
533 // udiv r0, r0, r1
534 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
535
536 // ixor r10, r10, r11
537 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
538
539 // iadd r0, r0, r10
540 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
541
542 // ixor DST, r0, r10
543 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
544 return DST;
545}
546
547SDValue
548AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
549 return SDValue(Op.getNode(), 0);
550}
551
552SDValue
553AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
554 DebugLoc DL = Op.getDebugLoc();
555 EVT OVT = Op.getValueType();
556 MVT INTTY = MVT::i32;
557 if (OVT == MVT::v2i8) {
558 INTTY = MVT::v2i32;
559 } else if (OVT == MVT::v4i8) {
560 INTTY = MVT::v4i32;
561 }
562 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
563 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
564 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
565 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
566 return LHS;
567}
568
569SDValue
570AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
571 DebugLoc DL = Op.getDebugLoc();
572 EVT OVT = Op.getValueType();
573 MVT INTTY = MVT::i32;
574 if (OVT == MVT::v2i16) {
575 INTTY = MVT::v2i32;
576 } else if (OVT == MVT::v4i16) {
577 INTTY = MVT::v4i32;
578 }
579 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
580 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
581 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
582 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
583 return LHS;
584}
585
586SDValue
587AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
588 DebugLoc DL = Op.getDebugLoc();
589 EVT OVT = Op.getValueType();
590 SDValue LHS = Op.getOperand(0);
591 SDValue RHS = Op.getOperand(1);
592 // The LowerSREM32 function generates equivalent to the following IL.
593 // mov r0, LHS
594 // mov r1, RHS
595 // ilt r10, r0, 0
596 // ilt r11, r1, 0
597 // iadd r0, r0, r10
598 // iadd r1, r1, r11
599 // ixor r0, r0, r10
600 // ixor r1, r1, r11
601 // udiv r20, r0, r1
602 // umul r20, r20, r1
603 // sub r0, r0, r20
604 // iadd r0, r0, r10
605 // ixor DST, r0, r10
606
607 // mov r0, LHS
608 SDValue r0 = LHS;
609
610 // mov r1, RHS
611 SDValue r1 = RHS;
612
613 // ilt r10, r0, 0
614 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
615
616 // ilt r11, r1, 0
617 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
618
619 // iadd r0, r0, r10
620 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
621
622 // iadd r1, r1, r11
623 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
624
625 // ixor r0, r0, r10
626 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
627
628 // ixor r1, r1, r11
629 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
630
631 // udiv r20, r0, r1
632 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
633
634 // umul r20, r20, r1
635 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
636
637 // sub r0, r0, r20
638 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
639
640 // iadd r0, r0, r10
641 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
642
643 // ixor DST, r0, r10
644 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
645 return DST;
646}
647
648SDValue
649AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
650 return SDValue(Op.getNode(), 0);
651}