blob: 02d6fab94d9a7984c1d4c99f2be98a0bd68a0038 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000017#include "AMDGPU.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000018#include "AMDGPURegisterInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000019#include "AMDGPUSubtarget.h"
Benjamin Kramer5c352902013-05-23 17:10:37 +000020#include "AMDILIntrinsicInfo.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000021#include "SIMachineFunctionInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000022#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000023#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27
28using namespace llvm;
29
Christian Konig90c64cb2013-03-07 09:03:52 +000030#include "AMDGPUGenCallingConv.inc"
31
Tom Stellardf98f2ce2012-12-11 21:25:42 +000032AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
33 TargetLowering(TM, new TargetLoweringObjectFileELF()) {
34
35 // Initialize target lowering borrowed from AMDIL
36 InitAMDILLowering();
37
38 // We need to custom lower some of the intrinsics
39 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
40
41 // Library functions. These default to Expand, but we have instructions
42 // for them.
43 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
44 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
45 setOperationAction(ISD::FPOW, MVT::f32, Legal);
46 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
47 setOperationAction(ISD::FABS, MVT::f32, Legal);
48 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
49 setOperationAction(ISD::FRINT, MVT::f32, Legal);
50
Tom Stellardba534c22013-05-20 15:02:19 +000051 // The hardware supports ROTR, but not ROTL
52 setOperationAction(ISD::ROTL, MVT::i32, Expand);
53
Tom Stellardf98f2ce2012-12-11 21:25:42 +000054 // Lower floating point store/load to integer store/load to reduce the number
55 // of patterns in tablegen.
56 setOperationAction(ISD::STORE, MVT::f32, Promote);
57 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
58
59 setOperationAction(ISD::STORE, MVT::v4f32, Promote);
60 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
61
62 setOperationAction(ISD::LOAD, MVT::f32, Promote);
63 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
64
65 setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
66 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
67
Christian Konig45b14e32013-03-27 09:12:51 +000068 setOperationAction(ISD::MUL, MVT::i64, Expand);
69
Tom Stellardf98f2ce2012-12-11 21:25:42 +000070 setOperationAction(ISD::UDIV, MVT::i32, Expand);
71 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
72 setOperationAction(ISD::UREM, MVT::i32, Expand);
73}
74
75//===---------------------------------------------------------------------===//
76// TargetLowering Callbacks
77//===---------------------------------------------------------------------===//
78
Christian Konig90c64cb2013-03-07 09:03:52 +000079void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
80 const SmallVectorImpl<ISD::InputArg> &Ins) const {
81
82 State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
Tom Stellardf98f2ce2012-12-11 21:25:42 +000083}
84
85SDValue AMDGPUTargetLowering::LowerReturn(
86 SDValue Chain,
87 CallingConv::ID CallConv,
88 bool isVarArg,
89 const SmallVectorImpl<ISD::OutputArg> &Outs,
90 const SmallVectorImpl<SDValue> &OutVals,
Andrew Trickac6d9be2013-05-25 02:42:55 +000091 SDLoc DL, SelectionDAG &DAG) const {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000092 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
93}
94
95//===---------------------------------------------------------------------===//
96// Target specific lowering
97//===---------------------------------------------------------------------===//
98
99SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
100 const {
101 switch (Op.getOpcode()) {
102 default:
103 Op.getNode()->dump();
104 assert(0 && "Custom lowering code for this"
105 "instruction is not implemented yet!");
106 break;
107 // AMDIL DAG lowering
108 case ISD::SDIV: return LowerSDIV(Op, DAG);
109 case ISD::SREM: return LowerSREM(Op, DAG);
110 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
111 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
112 // AMDGPU DAG lowering
113 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
114 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
115 }
116 return Op;
117}
118
119SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
120 SelectionDAG &DAG) const {
121 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
Andrew Trickac6d9be2013-05-25 02:42:55 +0000122 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000123 EVT VT = Op.getValueType();
124
125 switch (IntrinsicID) {
126 default: return Op;
127 case AMDGPUIntrinsic::AMDIL_abs:
128 return LowerIntrinsicIABS(Op, DAG);
129 case AMDGPUIntrinsic::AMDIL_exp:
130 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
131 case AMDGPUIntrinsic::AMDGPU_lrp:
132 return LowerIntrinsicLRP(Op, DAG);
133 case AMDGPUIntrinsic::AMDIL_fraction:
134 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000135 case AMDGPUIntrinsic::AMDIL_max:
136 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
137 Op.getOperand(2));
138 case AMDGPUIntrinsic::AMDGPU_imax:
139 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
140 Op.getOperand(2));
141 case AMDGPUIntrinsic::AMDGPU_umax:
142 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
143 Op.getOperand(2));
144 case AMDGPUIntrinsic::AMDIL_min:
145 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
146 Op.getOperand(2));
147 case AMDGPUIntrinsic::AMDGPU_imin:
148 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
149 Op.getOperand(2));
150 case AMDGPUIntrinsic::AMDGPU_umin:
151 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
152 Op.getOperand(2));
153 case AMDGPUIntrinsic::AMDIL_round_nearest:
154 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
155 }
156}
157
158///IABS(a) = SMAX(sub(0, a), a)
159SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
160 SelectionDAG &DAG) const {
161
Andrew Trickac6d9be2013-05-25 02:42:55 +0000162 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000163 EVT VT = Op.getValueType();
164 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
165 Op.getOperand(1));
166
167 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
168}
169
170/// Linear Interpolation
171/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
172SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
173 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000174 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000175 EVT VT = Op.getValueType();
176 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
177 DAG.getConstantFP(1.0f, MVT::f32),
178 Op.getOperand(1));
179 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
180 Op.getOperand(3));
Vincent Lejeunee3111962013-02-18 14:11:28 +0000181 return DAG.getNode(ISD::FADD, DL, VT,
182 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
183 OneSubAC);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000184}
185
186/// \brief Generate Min/Max node
187SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
188 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000189 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000190 EVT VT = Op.getValueType();
191
192 SDValue LHS = Op.getOperand(0);
193 SDValue RHS = Op.getOperand(1);
194 SDValue True = Op.getOperand(2);
195 SDValue False = Op.getOperand(3);
196 SDValue CC = Op.getOperand(4);
197
198 if (VT != MVT::f32 ||
199 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
200 return SDValue();
201 }
202
203 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
204 switch (CCOpcode) {
205 case ISD::SETOEQ:
206 case ISD::SETONE:
207 case ISD::SETUNE:
208 case ISD::SETNE:
209 case ISD::SETUEQ:
210 case ISD::SETEQ:
211 case ISD::SETFALSE:
212 case ISD::SETFALSE2:
213 case ISD::SETTRUE:
214 case ISD::SETTRUE2:
215 case ISD::SETUO:
216 case ISD::SETO:
217 assert(0 && "Operation should already be optimised !");
218 case ISD::SETULE:
219 case ISD::SETULT:
220 case ISD::SETOLE:
221 case ISD::SETOLT:
222 case ISD::SETLE:
223 case ISD::SETLT: {
224 if (LHS == True)
225 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
226 else
227 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
228 }
229 case ISD::SETGT:
230 case ISD::SETGE:
231 case ISD::SETUGE:
232 case ISD::SETOGE:
233 case ISD::SETUGT:
234 case ISD::SETOGT: {
235 if (LHS == True)
236 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
237 else
238 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
239 }
240 case ISD::SETCC_INVALID:
241 assert(0 && "Invalid setcc condcode !");
242 }
243 return Op;
244}
245
246
247
248SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
249 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000250 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000251 EVT VT = Op.getValueType();
252
253 SDValue Num = Op.getOperand(0);
254 SDValue Den = Op.getOperand(1);
255
256 SmallVector<SDValue, 8> Results;
257
258 // RCP = URECIP(Den) = 2^32 / Den + e
259 // e is rounding error.
260 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
261
262 // RCP_LO = umulo(RCP, Den) */
263 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
264
265 // RCP_HI = mulhu (RCP, Den) */
266 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
267
268 // NEG_RCP_LO = -RCP_LO
269 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
270 RCP_LO);
271
272 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
273 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
274 NEG_RCP_LO, RCP_LO,
275 ISD::SETEQ);
276 // Calculate the rounding error from the URECIP instruction
277 // E = mulhu(ABS_RCP_LO, RCP)
278 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
279
280 // RCP_A_E = RCP + E
281 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
282
283 // RCP_S_E = RCP - E
284 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
285
286 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
287 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
288 RCP_A_E, RCP_S_E,
289 ISD::SETEQ);
290 // Quotient = mulhu(Tmp0, Num)
291 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
292
293 // Num_S_Remainder = Quotient * Den
294 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
295
296 // Remainder = Num - Num_S_Remainder
297 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
298
299 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
300 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
301 DAG.getConstant(-1, VT),
302 DAG.getConstant(0, VT),
303 ISD::SETGE);
304 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
305 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
306 DAG.getConstant(0, VT),
307 DAG.getConstant(-1, VT),
308 DAG.getConstant(0, VT),
309 ISD::SETGE);
310 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
311 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
312 Remainder_GE_Zero);
313
314 // Calculate Division result:
315
316 // Quotient_A_One = Quotient + 1
317 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
318 DAG.getConstant(1, VT));
319
320 // Quotient_S_One = Quotient - 1
321 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
322 DAG.getConstant(1, VT));
323
324 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
325 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
326 Quotient, Quotient_A_One, ISD::SETEQ);
327
328 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
329 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
330 Quotient_S_One, Div, ISD::SETEQ);
331
332 // Calculate Rem result:
333
334 // Remainder_S_Den = Remainder - Den
335 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
336
337 // Remainder_A_Den = Remainder + Den
338 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
339
340 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
341 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
342 Remainder, Remainder_S_Den, ISD::SETEQ);
343
344 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
345 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
346 Remainder_A_Den, Rem, ISD::SETEQ);
347 SDValue Ops[2];
348 Ops[0] = Div;
349 Ops[1] = Rem;
350 return DAG.getMergeValues(Ops, 2, DL);
351}
352
353//===----------------------------------------------------------------------===//
354// Helper functions
355//===----------------------------------------------------------------------===//
356
357bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
358 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
359 return CFP->isExactlyValue(1.0);
360 }
361 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
362 return C->isAllOnesValue();
363 }
364 return false;
365}
366
367bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
368 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
369 return CFP->getValueAPF().isZero();
370 }
371 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
372 return C->isNullValue();
373 }
374 return false;
375}
376
377SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
378 const TargetRegisterClass *RC,
379 unsigned Reg, EVT VT) const {
380 MachineFunction &MF = DAG.getMachineFunction();
381 MachineRegisterInfo &MRI = MF.getRegInfo();
382 unsigned VirtualRegister;
383 if (!MRI.isLiveIn(Reg)) {
384 VirtualRegister = MRI.createVirtualRegister(RC);
385 MRI.addLiveIn(Reg, VirtualRegister);
386 } else {
387 VirtualRegister = MRI.getLiveInVirtReg(Reg);
388 }
389 return DAG.getRegister(VirtualRegister, VT);
390}
391
392#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
393
394const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
395 switch (Opcode) {
396 default: return 0;
397 // AMDIL DAG nodes
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000398 NODE_NAME_CASE(CALL);
399 NODE_NAME_CASE(UMUL);
400 NODE_NAME_CASE(DIV_INF);
401 NODE_NAME_CASE(RET_FLAG);
402 NODE_NAME_CASE(BRANCH_COND);
403
404 // AMDGPU DAG nodes
405 NODE_NAME_CASE(DWORDADDR)
406 NODE_NAME_CASE(FRACT)
407 NODE_NAME_CASE(FMAX)
408 NODE_NAME_CASE(SMAX)
409 NODE_NAME_CASE(UMAX)
410 NODE_NAME_CASE(FMIN)
411 NODE_NAME_CASE(SMIN)
412 NODE_NAME_CASE(UMIN)
413 NODE_NAME_CASE(URECIP)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000414 NODE_NAME_CASE(EXPORT)
Tom Stellardc7e18882013-01-23 02:09:03 +0000415 NODE_NAME_CASE(CONST_ADDRESS)
Tom Stellardc0b0c672013-02-06 17:32:29 +0000416 NODE_NAME_CASE(REGISTER_LOAD)
417 NODE_NAME_CASE(REGISTER_STORE)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000418 }
419}