blob: 7cea803f89871dbbff2a68aadaa67dbf868d92af [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief TargetLowering functions borrowed from AMDIL.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUISelLowering.h"
16#include "AMDGPURegisterInfo.h"
Chandler Carruth58a2cbe2013-01-02 10:22:59 +000017#include "AMDGPUSubtarget.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000018#include "AMDILIntrinsicInfo.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SelectionDAGNodes.h"
24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth0b8c9a82013-01-02 11:36:10 +000025#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Instructions.h"
28#include "llvm/IR/Intrinsics.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000029#include "llvm/Support/raw_ostream.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetOptions.h"
32
33using namespace llvm;
34//===----------------------------------------------------------------------===//
Tom Stellardf98f2ce2012-12-11 21:25:42 +000035// TargetLowering Implementation Help Functions End
36//===----------------------------------------------------------------------===//
37
38//===----------------------------------------------------------------------===//
39// TargetLowering Class Implementation Begins
40//===----------------------------------------------------------------------===//
41void AMDGPUTargetLowering::InitAMDILLowering() {
Stephen Hinesdce4a402014-05-29 02:49:00 -070042 static const MVT::SimpleValueType types[] = {
43 MVT::i8,
44 MVT::i16,
45 MVT::i32,
46 MVT::f32,
47 MVT::f64,
48 MVT::i64,
49 MVT::v2i8,
50 MVT::v4i8,
51 MVT::v2i16,
52 MVT::v4i16,
53 MVT::v4f32,
54 MVT::v4i32,
55 MVT::v2f32,
56 MVT::v2i32,
57 MVT::v2f64,
58 MVT::v2i64
Tom Stellardf98f2ce2012-12-11 21:25:42 +000059 };
60
Stephen Hinesdce4a402014-05-29 02:49:00 -070061 static const MVT::SimpleValueType IntTypes[] = {
62 MVT::i8,
63 MVT::i16,
64 MVT::i32,
65 MVT::i64
Tom Stellardf98f2ce2012-12-11 21:25:42 +000066 };
67
Stephen Hinesdce4a402014-05-29 02:49:00 -070068 static const MVT::SimpleValueType FloatTypes[] = {
69 MVT::f32,
70 MVT::f64
Tom Stellardf98f2ce2012-12-11 21:25:42 +000071 };
72
Stephen Hinesdce4a402014-05-29 02:49:00 -070073 static const MVT::SimpleValueType VectorTypes[] = {
74 MVT::v2i8,
75 MVT::v4i8,
76 MVT::v2i16,
77 MVT::v4i16,
78 MVT::v4f32,
79 MVT::v4i32,
80 MVT::v2f32,
81 MVT::v2i32,
82 MVT::v2f64,
83 MVT::v2i64
Tom Stellardf98f2ce2012-12-11 21:25:42 +000084 };
Tom Stellardf98f2ce2012-12-11 21:25:42 +000085
86 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
87 // These are the current register classes that are
88 // supported
89
Stephen Hinesdce4a402014-05-29 02:49:00 -070090 for (MVT VT : types) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000091 setOperationAction(ISD::SUBE, VT, Expand);
92 setOperationAction(ISD::SUBC, VT, Expand);
93 setOperationAction(ISD::ADDE, VT, Expand);
94 setOperationAction(ISD::ADDC, VT, Expand);
95 setOperationAction(ISD::BRCOND, VT, Custom);
96 setOperationAction(ISD::BR_JT, VT, Expand);
97 setOperationAction(ISD::BRIND, VT, Expand);
98 // TODO: Implement custom UREM/SREM routines
99 setOperationAction(ISD::SREM, VT, Expand);
100 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
101 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
102 if (VT != MVT::i64 && VT != MVT::v2i64) {
103 setOperationAction(ISD::SDIV, VT, Custom);
104 }
105 }
Stephen Hinesdce4a402014-05-29 02:49:00 -0700106 for (MVT VT : FloatTypes) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000107 // IL does not have these operations for floating point types
108 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
109 setOperationAction(ISD::SETOLT, VT, Expand);
110 setOperationAction(ISD::SETOGE, VT, Expand);
111 setOperationAction(ISD::SETOGT, VT, Expand);
112 setOperationAction(ISD::SETOLE, VT, Expand);
113 setOperationAction(ISD::SETULT, VT, Expand);
114 setOperationAction(ISD::SETUGE, VT, Expand);
115 setOperationAction(ISD::SETUGT, VT, Expand);
116 setOperationAction(ISD::SETULE, VT, Expand);
117 }
118
Stephen Hinesdce4a402014-05-29 02:49:00 -0700119 for (MVT VT : IntTypes) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000120 // GPU also does not have divrem function for signed or unsigned
121 setOperationAction(ISD::SDIVREM, VT, Expand);
122
123 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
124 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
125 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
126
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000127 setOperationAction(ISD::BSWAP, VT, Expand);
128
129 // GPU doesn't have any counting operators
130 setOperationAction(ISD::CTPOP, VT, Expand);
131 setOperationAction(ISD::CTTZ, VT, Expand);
132 setOperationAction(ISD::CTLZ, VT, Expand);
133 }
134
Stephen Hinesdce4a402014-05-29 02:49:00 -0700135 for (MVT VT : VectorTypes) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000136 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
137 setOperationAction(ISD::SDIVREM, VT, Expand);
138 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
139 // setOperationAction(ISD::VSETCC, VT, Expand);
140 setOperationAction(ISD::SELECT_CC, VT, Expand);
141
142 }
Tom Stellard3ff0abf2013-06-07 20:37:48 +0000143 setOperationAction(ISD::MULHU, MVT::i64, Expand);
144 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
145 setOperationAction(ISD::MULHS, MVT::i64, Expand);
146 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
147 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
148 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
149 setOperationAction(ISD::Constant , MVT::i64 , Legal);
150 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
151 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
152 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
153 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
154 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
155 if (STM.hasHWFP64()) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000156 // we support loading/storing v2f64 but not operations on the type
157 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
158 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
159 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
160 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
161 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
162 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
163 // We want to expand vector conversions into their scalar
164 // counterparts.
165 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
166 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
167 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
168 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
169 setOperationAction(ISD::FABS, MVT::f64, Expand);
170 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
171 }
172 // TODO: Fix the UDIV24 algorithm so it works for these
173 // types correctly. This needs vector comparisons
174 // for this to work correctly.
175 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
176 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
177 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
178 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000179 setOperationAction(ISD::SUBC, MVT::Other, Expand);
180 setOperationAction(ISD::ADDE, MVT::Other, Expand);
181 setOperationAction(ISD::ADDC, MVT::Other, Expand);
182 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
183 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
184 setOperationAction(ISD::BRIND, MVT::Other, Expand);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000185
186
187 // Use the default implementation.
188 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
189 setOperationAction(ISD::Constant , MVT::i32 , Legal);
190
191 setSchedulingPreference(Sched::RegPressure);
192 setPow2DivIsCheap(false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000193 setSelectIsExpensive(true);
194 setJumpIsExpensive(true);
195
Jim Grosbach64f3e762013-02-20 21:31:28 +0000196 MaxStoresPerMemcpy = 4096;
197 MaxStoresPerMemmove = 4096;
198 MaxStoresPerMemset = 4096;
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000199
200}
201
202bool
203AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
204 const CallInst &I, unsigned Intrinsic) const {
205 return false;
206}
207
208// The backend supports 32 and 64 bit floating point immediates
209bool
210AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
211 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
212 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
213 return true;
214 } else {
215 return false;
216 }
217}
218
219bool
220AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
221 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
222 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
223 return false;
224 } else {
225 return true;
226 }
227}
228
229
230// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
231// be zero. Op is expected to be a target specific node. Used by DAG
232// combiner.
233
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000234//===----------------------------------------------------------------------===//
235// Other Lowering Hooks
236//===----------------------------------------------------------------------===//
237
238SDValue
239AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
240 EVT OVT = Op.getValueType();
241 SDValue DST;
242 if (OVT.getScalarType() == MVT::i64) {
243 DST = LowerSDIV64(Op, DAG);
244 } else if (OVT.getScalarType() == MVT::i32) {
245 DST = LowerSDIV32(Op, DAG);
246 } else if (OVT.getScalarType() == MVT::i16
247 || OVT.getScalarType() == MVT::i8) {
248 DST = LowerSDIV24(Op, DAG);
249 } else {
250 DST = SDValue(Op.getNode(), 0);
251 }
252 return DST;
253}
254
255SDValue
256AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
257 EVT OVT = Op.getValueType();
258 SDValue DST;
259 if (OVT.getScalarType() == MVT::i64) {
260 DST = LowerSREM64(Op, DAG);
261 } else if (OVT.getScalarType() == MVT::i32) {
262 DST = LowerSREM32(Op, DAG);
263 } else if (OVT.getScalarType() == MVT::i16) {
264 DST = LowerSREM16(Op, DAG);
265 } else if (OVT.getScalarType() == MVT::i8) {
266 DST = LowerSREM8(Op, DAG);
267 } else {
268 DST = SDValue(Op.getNode(), 0);
269 }
270 return DST;
271}
272
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000273EVT
274AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
275 int iSize = (size * numEle);
276 int vEle = (iSize >> ((size == 64) ? 6 : 5));
277 if (!vEle) {
278 vEle = 1;
279 }
280 if (size == 64) {
281 if (vEle == 1) {
282 return EVT(MVT::i64);
283 } else {
284 return EVT(MVT::getVectorVT(MVT::i64, vEle));
285 }
286 } else {
287 if (vEle == 1) {
288 return EVT(MVT::i32);
289 } else {
290 return EVT(MVT::getVectorVT(MVT::i32, vEle));
291 }
292 }
293}
294
295SDValue
296AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
297 SDValue Chain = Op.getOperand(0);
298 SDValue Cond = Op.getOperand(1);
299 SDValue Jump = Op.getOperand(2);
300 SDValue Result;
301 Result = DAG.getNode(
302 AMDGPUISD::BRANCH_COND,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000303 SDLoc(Op),
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000304 Op.getValueType(),
305 Chain, Jump, Cond);
306 return Result;
307}
308
309SDValue
310AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000311 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000312 EVT OVT = Op.getValueType();
313 SDValue LHS = Op.getOperand(0);
314 SDValue RHS = Op.getOperand(1);
315 MVT INTTY;
316 MVT FLTTY;
317 if (!OVT.isVector()) {
318 INTTY = MVT::i32;
319 FLTTY = MVT::f32;
320 } else if (OVT.getVectorNumElements() == 2) {
321 INTTY = MVT::v2i32;
322 FLTTY = MVT::v2f32;
323 } else if (OVT.getVectorNumElements() == 4) {
324 INTTY = MVT::v4i32;
325 FLTTY = MVT::v4f32;
326 }
327 unsigned bitsize = OVT.getScalarType().getSizeInBits();
328 // char|short jq = ia ^ ib;
329 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
330
331 // jq = jq >> (bitsize - 2)
332 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
333
334 // jq = jq | 0x1
335 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
336
337 // jq = (int)jq
338 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
339
340 // int ia = (int)LHS;
341 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
342
343 // int ib, (int)RHS;
344 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
345
346 // float fa = (float)ia;
347 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
348
349 // float fb = (float)ib;
350 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
351
352 // float fq = native_divide(fa, fb);
353 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
354
355 // fq = trunc(fq);
356 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
357
358 // float fqneg = -fq;
359 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
360
361 // float fr = mad(fqneg, fb, fa);
Vincent Lejeunee3111962013-02-18 14:11:28 +0000362 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
363 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000364
365 // int iq = (int)fq;
366 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
367
368 // fr = fabs(fr);
369 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
370
371 // fb = fabs(fb);
372 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
373
374 // int cv = fr >= fb;
375 SDValue cv;
376 if (INTTY == MVT::i32) {
377 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
378 } else {
379 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
380 }
381 // jq = (cv ? jq : 0);
382 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
383 DAG.getConstant(0, OVT));
384 // dst = iq + jq;
385 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
386 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
387 return iq;
388}
389
390SDValue
391AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000392 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000393 EVT OVT = Op.getValueType();
394 SDValue LHS = Op.getOperand(0);
395 SDValue RHS = Op.getOperand(1);
396 // The LowerSDIV32 function generates equivalent to the following IL.
397 // mov r0, LHS
398 // mov r1, RHS
399 // ilt r10, r0, 0
400 // ilt r11, r1, 0
401 // iadd r0, r0, r10
402 // iadd r1, r1, r11
403 // ixor r0, r0, r10
404 // ixor r1, r1, r11
405 // udiv r0, r0, r1
406 // ixor r10, r10, r11
407 // iadd r0, r0, r10
408 // ixor DST, r0, r10
409
410 // mov r0, LHS
411 SDValue r0 = LHS;
412
413 // mov r1, RHS
414 SDValue r1 = RHS;
415
416 // ilt r10, r0, 0
417 SDValue r10 = DAG.getSelectCC(DL,
418 r0, DAG.getConstant(0, OVT),
419 DAG.getConstant(-1, MVT::i32),
420 DAG.getConstant(0, MVT::i32),
421 ISD::SETLT);
422
423 // ilt r11, r1, 0
424 SDValue r11 = DAG.getSelectCC(DL,
425 r1, DAG.getConstant(0, OVT),
426 DAG.getConstant(-1, MVT::i32),
427 DAG.getConstant(0, MVT::i32),
428 ISD::SETLT);
429
430 // iadd r0, r0, r10
431 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
432
433 // iadd r1, r1, r11
434 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
435
436 // ixor r0, r0, r10
437 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
438
439 // ixor r1, r1, r11
440 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
441
442 // udiv r0, r0, r1
443 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
444
445 // ixor r10, r10, r11
446 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
447
448 // iadd r0, r0, r10
449 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
450
451 // ixor DST, r0, r10
452 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
453 return DST;
454}
455
456SDValue
457AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
458 return SDValue(Op.getNode(), 0);
459}
460
461SDValue
462AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000463 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000464 EVT OVT = Op.getValueType();
465 MVT INTTY = MVT::i32;
466 if (OVT == MVT::v2i8) {
467 INTTY = MVT::v2i32;
468 } else if (OVT == MVT::v4i8) {
469 INTTY = MVT::v4i32;
470 }
471 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
472 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
473 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
474 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
475 return LHS;
476}
477
478SDValue
479AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000480 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000481 EVT OVT = Op.getValueType();
482 MVT INTTY = MVT::i32;
483 if (OVT == MVT::v2i16) {
484 INTTY = MVT::v2i32;
485 } else if (OVT == MVT::v4i16) {
486 INTTY = MVT::v4i32;
487 }
488 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
489 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
490 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
491 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
492 return LHS;
493}
494
495SDValue
496AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000497 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000498 EVT OVT = Op.getValueType();
499 SDValue LHS = Op.getOperand(0);
500 SDValue RHS = Op.getOperand(1);
501 // The LowerSREM32 function generates equivalent to the following IL.
502 // mov r0, LHS
503 // mov r1, RHS
504 // ilt r10, r0, 0
505 // ilt r11, r1, 0
506 // iadd r0, r0, r10
507 // iadd r1, r1, r11
508 // ixor r0, r0, r10
509 // ixor r1, r1, r11
510 // udiv r20, r0, r1
511 // umul r20, r20, r1
512 // sub r0, r0, r20
513 // iadd r0, r0, r10
514 // ixor DST, r0, r10
515
516 // mov r0, LHS
517 SDValue r0 = LHS;
518
519 // mov r1, RHS
520 SDValue r1 = RHS;
521
522 // ilt r10, r0, 0
523 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
524
525 // ilt r11, r1, 0
526 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
527
528 // iadd r0, r0, r10
529 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
530
531 // iadd r1, r1, r11
532 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
533
534 // ixor r0, r0, r10
535 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
536
537 // ixor r1, r1, r11
538 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
539
540 // udiv r20, r0, r1
541 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
542
543 // umul r20, r20, r1
544 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
545
546 // sub r0, r0, r20
547 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
548
549 // iadd r0, r0, r10
550 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
551
552 // ixor DST, r0, r10
553 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
554 return DST;
555}
556
557SDValue
558AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
559 return SDValue(Op.getNode(), 0);
560}