blob: 6d7359053dc0a434a92c50ded50f15483cd672e0 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000017#include "AMDGPU.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000018#include "AMDGPURegisterInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000019#include "AMDGPUSubtarget.h"
Benjamin Kramer5c352902013-05-23 17:10:37 +000020#include "AMDILIntrinsicInfo.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000021#include "SIMachineFunctionInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000023#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27
28using namespace llvm;
29
Christian Konig90c64cb2013-03-07 09:03:52 +000030#include "AMDGPUGenCallingConv.inc"
31
Tom Stellardf98f2ce2012-12-11 21:25:42 +000032AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
33 TargetLowering(TM, new TargetLoweringObjectFileELF()) {
34
35 // Initialize target lowering borrowed from AMDIL
36 InitAMDILLowering();
37
38 // We need to custom lower some of the intrinsics
39 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
40
41 // Library functions. These default to Expand, but we have instructions
42 // for them.
43 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
44 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
45 setOperationAction(ISD::FPOW, MVT::f32, Legal);
46 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
47 setOperationAction(ISD::FABS, MVT::f32, Legal);
48 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
49 setOperationAction(ISD::FRINT, MVT::f32, Legal);
50
Tom Stellardba534c22013-05-20 15:02:19 +000051 // The hardware supports ROTR, but not ROTL
52 setOperationAction(ISD::ROTL, MVT::i32, Expand);
53
Tom Stellardf98f2ce2012-12-11 21:25:42 +000054 // Lower floating point store/load to integer store/load to reduce the number
55 // of patterns in tablegen.
56 setOperationAction(ISD::STORE, MVT::f32, Promote);
57 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
58
59 setOperationAction(ISD::STORE, MVT::v4f32, Promote);
60 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
61
62 setOperationAction(ISD::LOAD, MVT::f32, Promote);
63 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
64
65 setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
66 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
67
Christian Konig45b14e32013-03-27 09:12:51 +000068 setOperationAction(ISD::MUL, MVT::i64, Expand);
69
Tom Stellardf98f2ce2012-12-11 21:25:42 +000070 setOperationAction(ISD::UDIV, MVT::i32, Expand);
71 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
72 setOperationAction(ISD::UREM, MVT::i32, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +000073
74 int types[] = {
75 (int)MVT::v2i32,
76 (int)MVT::v4i32
77 };
78 size_t NumTypes = sizeof(types) / sizeof(*types);
79
80 for (unsigned int x = 0; x < NumTypes; ++x) {
81 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
82 //Expand the following operations for the current type by default
83 setOperationAction(ISD::ADD, VT, Expand);
84 setOperationAction(ISD::AND, VT, Expand);
85 setOperationAction(ISD::MUL, VT, Expand);
86 setOperationAction(ISD::OR, VT, Expand);
87 setOperationAction(ISD::SHL, VT, Expand);
88 setOperationAction(ISD::SRL, VT, Expand);
89 setOperationAction(ISD::SRA, VT, Expand);
90 setOperationAction(ISD::SUB, VT, Expand);
91 setOperationAction(ISD::UDIV, VT, Expand);
92 setOperationAction(ISD::UREM, VT, Expand);
93 setOperationAction(ISD::XOR, VT, Expand);
94 }
Tom Stellardf98f2ce2012-12-11 21:25:42 +000095}
96
97//===---------------------------------------------------------------------===//
98// TargetLowering Callbacks
99//===---------------------------------------------------------------------===//
100
Christian Konig90c64cb2013-03-07 09:03:52 +0000101void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
102 const SmallVectorImpl<ISD::InputArg> &Ins) const {
103
104 State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000105}
106
107SDValue AMDGPUTargetLowering::LowerReturn(
108 SDValue Chain,
109 CallingConv::ID CallConv,
110 bool isVarArg,
111 const SmallVectorImpl<ISD::OutputArg> &Outs,
112 const SmallVectorImpl<SDValue> &OutVals,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000113 SDLoc DL, SelectionDAG &DAG) const {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000114 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
115}
116
117//===---------------------------------------------------------------------===//
118// Target specific lowering
119//===---------------------------------------------------------------------===//
120
121SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
122 const {
123 switch (Op.getOpcode()) {
124 default:
125 Op.getNode()->dump();
126 assert(0 && "Custom lowering code for this"
127 "instruction is not implemented yet!");
128 break;
129 // AMDIL DAG lowering
130 case ISD::SDIV: return LowerSDIV(Op, DAG);
131 case ISD::SREM: return LowerSREM(Op, DAG);
132 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
133 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
134 // AMDGPU DAG lowering
135 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
136 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
137 }
138 return Op;
139}
140
141SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
142 SelectionDAG &DAG) const {
143 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
Andrew Trickac6d9be2013-05-25 02:42:55 +0000144 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000145 EVT VT = Op.getValueType();
146
147 switch (IntrinsicID) {
148 default: return Op;
149 case AMDGPUIntrinsic::AMDIL_abs:
150 return LowerIntrinsicIABS(Op, DAG);
151 case AMDGPUIntrinsic::AMDIL_exp:
152 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
153 case AMDGPUIntrinsic::AMDGPU_lrp:
154 return LowerIntrinsicLRP(Op, DAG);
155 case AMDGPUIntrinsic::AMDIL_fraction:
156 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000157 case AMDGPUIntrinsic::AMDIL_max:
158 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
159 Op.getOperand(2));
160 case AMDGPUIntrinsic::AMDGPU_imax:
161 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
162 Op.getOperand(2));
163 case AMDGPUIntrinsic::AMDGPU_umax:
164 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
165 Op.getOperand(2));
166 case AMDGPUIntrinsic::AMDIL_min:
167 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
168 Op.getOperand(2));
169 case AMDGPUIntrinsic::AMDGPU_imin:
170 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
171 Op.getOperand(2));
172 case AMDGPUIntrinsic::AMDGPU_umin:
173 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
174 Op.getOperand(2));
175 case AMDGPUIntrinsic::AMDIL_round_nearest:
176 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
177 }
178}
179
180///IABS(a) = SMAX(sub(0, a), a)
181SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
182 SelectionDAG &DAG) const {
183
Andrew Trickac6d9be2013-05-25 02:42:55 +0000184 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000185 EVT VT = Op.getValueType();
186 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
187 Op.getOperand(1));
188
189 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
190}
191
192/// Linear Interpolation
193/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
194SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
195 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000196 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000197 EVT VT = Op.getValueType();
198 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
199 DAG.getConstantFP(1.0f, MVT::f32),
200 Op.getOperand(1));
201 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
202 Op.getOperand(3));
Vincent Lejeunee3111962013-02-18 14:11:28 +0000203 return DAG.getNode(ISD::FADD, DL, VT,
204 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
205 OneSubAC);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000206}
207
208/// \brief Generate Min/Max node
209SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
210 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000211 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000212 EVT VT = Op.getValueType();
213
214 SDValue LHS = Op.getOperand(0);
215 SDValue RHS = Op.getOperand(1);
216 SDValue True = Op.getOperand(2);
217 SDValue False = Op.getOperand(3);
218 SDValue CC = Op.getOperand(4);
219
220 if (VT != MVT::f32 ||
221 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
222 return SDValue();
223 }
224
225 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
226 switch (CCOpcode) {
227 case ISD::SETOEQ:
228 case ISD::SETONE:
229 case ISD::SETUNE:
230 case ISD::SETNE:
231 case ISD::SETUEQ:
232 case ISD::SETEQ:
233 case ISD::SETFALSE:
234 case ISD::SETFALSE2:
235 case ISD::SETTRUE:
236 case ISD::SETTRUE2:
237 case ISD::SETUO:
238 case ISD::SETO:
239 assert(0 && "Operation should already be optimised !");
240 case ISD::SETULE:
241 case ISD::SETULT:
242 case ISD::SETOLE:
243 case ISD::SETOLT:
244 case ISD::SETLE:
245 case ISD::SETLT: {
246 if (LHS == True)
247 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
248 else
249 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
250 }
251 case ISD::SETGT:
252 case ISD::SETGE:
253 case ISD::SETUGE:
254 case ISD::SETOGE:
255 case ISD::SETUGT:
256 case ISD::SETOGT: {
257 if (LHS == True)
258 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
259 else
260 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
261 }
262 case ISD::SETCC_INVALID:
263 assert(0 && "Invalid setcc condcode !");
264 }
265 return Op;
266}
267
268
269
270SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
271 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000272 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000273 EVT VT = Op.getValueType();
274
275 SDValue Num = Op.getOperand(0);
276 SDValue Den = Op.getOperand(1);
277
278 SmallVector<SDValue, 8> Results;
279
280 // RCP = URECIP(Den) = 2^32 / Den + e
281 // e is rounding error.
282 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
283
284 // RCP_LO = umulo(RCP, Den) */
285 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
286
287 // RCP_HI = mulhu (RCP, Den) */
288 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
289
290 // NEG_RCP_LO = -RCP_LO
291 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
292 RCP_LO);
293
294 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
295 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
296 NEG_RCP_LO, RCP_LO,
297 ISD::SETEQ);
298 // Calculate the rounding error from the URECIP instruction
299 // E = mulhu(ABS_RCP_LO, RCP)
300 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
301
302 // RCP_A_E = RCP + E
303 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
304
305 // RCP_S_E = RCP - E
306 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
307
308 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
309 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
310 RCP_A_E, RCP_S_E,
311 ISD::SETEQ);
312 // Quotient = mulhu(Tmp0, Num)
313 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
314
315 // Num_S_Remainder = Quotient * Den
316 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
317
318 // Remainder = Num - Num_S_Remainder
319 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
320
321 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
322 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
323 DAG.getConstant(-1, VT),
324 DAG.getConstant(0, VT),
325 ISD::SETGE);
326 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
327 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
328 DAG.getConstant(0, VT),
329 DAG.getConstant(-1, VT),
330 DAG.getConstant(0, VT),
331 ISD::SETGE);
332 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
333 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
334 Remainder_GE_Zero);
335
336 // Calculate Division result:
337
338 // Quotient_A_One = Quotient + 1
339 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
340 DAG.getConstant(1, VT));
341
342 // Quotient_S_One = Quotient - 1
343 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
344 DAG.getConstant(1, VT));
345
346 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
347 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
348 Quotient, Quotient_A_One, ISD::SETEQ);
349
350 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
351 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
352 Quotient_S_One, Div, ISD::SETEQ);
353
354 // Calculate Rem result:
355
356 // Remainder_S_Den = Remainder - Den
357 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
358
359 // Remainder_A_Den = Remainder + Den
360 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
361
362 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
363 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
364 Remainder, Remainder_S_Den, ISD::SETEQ);
365
366 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
367 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
368 Remainder_A_Den, Rem, ISD::SETEQ);
369 SDValue Ops[2];
370 Ops[0] = Div;
371 Ops[1] = Rem;
372 return DAG.getMergeValues(Ops, 2, DL);
373}
374
375//===----------------------------------------------------------------------===//
376// Helper functions
377//===----------------------------------------------------------------------===//
378
379bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
380 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
381 return CFP->isExactlyValue(1.0);
382 }
383 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
384 return C->isAllOnesValue();
385 }
386 return false;
387}
388
389bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
390 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
391 return CFP->getValueAPF().isZero();
392 }
393 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
394 return C->isNullValue();
395 }
396 return false;
397}
398
399SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
400 const TargetRegisterClass *RC,
401 unsigned Reg, EVT VT) const {
402 MachineFunction &MF = DAG.getMachineFunction();
403 MachineRegisterInfo &MRI = MF.getRegInfo();
404 unsigned VirtualRegister;
405 if (!MRI.isLiveIn(Reg)) {
406 VirtualRegister = MRI.createVirtualRegister(RC);
407 MRI.addLiveIn(Reg, VirtualRegister);
408 } else {
409 VirtualRegister = MRI.getLiveInVirtReg(Reg);
410 }
411 return DAG.getRegister(VirtualRegister, VT);
412}
413
414#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
415
416const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
417 switch (Opcode) {
418 default: return 0;
419 // AMDIL DAG nodes
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000420 NODE_NAME_CASE(CALL);
421 NODE_NAME_CASE(UMUL);
422 NODE_NAME_CASE(DIV_INF);
423 NODE_NAME_CASE(RET_FLAG);
424 NODE_NAME_CASE(BRANCH_COND);
425
426 // AMDGPU DAG nodes
427 NODE_NAME_CASE(DWORDADDR)
428 NODE_NAME_CASE(FRACT)
429 NODE_NAME_CASE(FMAX)
430 NODE_NAME_CASE(SMAX)
431 NODE_NAME_CASE(UMAX)
432 NODE_NAME_CASE(FMIN)
433 NODE_NAME_CASE(SMIN)
434 NODE_NAME_CASE(UMIN)
435 NODE_NAME_CASE(URECIP)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000436 NODE_NAME_CASE(EXPORT)
Tom Stellardc7e18882013-01-23 02:09:03 +0000437 NODE_NAME_CASE(CONST_ADDRESS)
Tom Stellardc0b0c672013-02-06 17:32:29 +0000438 NODE_NAME_CASE(REGISTER_LOAD)
439 NODE_NAME_CASE(REGISTER_STORE)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000440 }
441}