blob: bde6894cec39c6a5aa0721548e74816942677d82 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000017#include "AMDGPU.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000018#include "AMDGPURegisterInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000019#include "AMDGPUSubtarget.h"
Benjamin Kramer5c352902013-05-23 17:10:37 +000020#include "AMDILIntrinsicInfo.h"
Tom Stellardf502c292013-07-23 01:48:05 +000021#include "R600MachineFunctionInfo.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000022#include "SIMachineFunctionInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Tom Stellarde3d4cbc2013-06-28 15:47:08 +000028#include "llvm/IR/DataLayout.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000029
30using namespace llvm;
Tom Stellardf95b1622013-10-23 00:44:32 +000031static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
32 CCValAssign::LocInfo LocInfo,
33 ISD::ArgFlagsTy ArgFlags, CCState &State) {
34 unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
35 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
36
37 return true;
38}
Tom Stellardf98f2ce2012-12-11 21:25:42 +000039
Christian Konig90c64cb2013-03-07 09:03:52 +000040#include "AMDGPUGenCallingConv.inc"
41
Tom Stellardf98f2ce2012-12-11 21:25:42 +000042AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
43 TargetLowering(TM, new TargetLoweringObjectFileELF()) {
44
45 // Initialize target lowering borrowed from AMDIL
46 InitAMDILLowering();
47
48 // We need to custom lower some of the intrinsics
49 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
50
51 // Library functions. These default to Expand, but we have instructions
52 // for them.
53 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
54 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
55 setOperationAction(ISD::FPOW, MVT::f32, Legal);
56 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
57 setOperationAction(ISD::FABS, MVT::f32, Legal);
58 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
59 setOperationAction(ISD::FRINT, MVT::f32, Legal);
60
Tom Stellardba534c22013-05-20 15:02:19 +000061 // The hardware supports ROTR, but not ROTL
62 setOperationAction(ISD::ROTL, MVT::i32, Expand);
63
Tom Stellardf98f2ce2012-12-11 21:25:42 +000064 // Lower floating point store/load to integer store/load to reduce the number
65 // of patterns in tablegen.
66 setOperationAction(ISD::STORE, MVT::f32, Promote);
67 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
68
Tom Stellardfc047272013-07-18 21:43:42 +000069 setOperationAction(ISD::STORE, MVT::v2f32, Promote);
70 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
71
Tom Stellardf98f2ce2012-12-11 21:25:42 +000072 setOperationAction(ISD::STORE, MVT::v4f32, Promote);
73 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
74
Tom Stellardf95b1622013-10-23 00:44:32 +000075 setOperationAction(ISD::STORE, MVT::v8f32, Promote);
76 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
77
78 setOperationAction(ISD::STORE, MVT::v16f32, Promote);
79 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
80
Tom Stellard68e13282013-07-12 18:14:56 +000081 setOperationAction(ISD::STORE, MVT::f64, Promote);
82 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
83
Tom Stellard7a0282d2013-08-26 15:05:44 +000084 // Custom lowering of vector stores is required for local address space
85 // stores.
86 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
87 // XXX: Native v2i32 local address space stores are possible, but not
88 // currently implemented.
89 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
90
Tom Stellard4c52d452013-08-16 01:12:11 +000091 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
92 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
93 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
94 // XXX: This can be change to Custom, once ExpandVectorStores can
95 // handle 64-bit stores.
96 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
97
Tom Stellardf98f2ce2012-12-11 21:25:42 +000098 setOperationAction(ISD::LOAD, MVT::f32, Promote);
99 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
100
Tom Stellardac85f3f2013-07-18 21:43:48 +0000101 setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
102 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
103
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000104 setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
105 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
106
Tom Stellardf95b1622013-10-23 00:44:32 +0000107 setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
108 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
109
110 setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
111 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
112
Tom Stellard68e13282013-07-12 18:14:56 +0000113 setOperationAction(ISD::LOAD, MVT::f64, Promote);
114 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
115
Tom Stellarda41520c2013-08-14 23:25:00 +0000116 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
117 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
118 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
119 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
Tom Stellard692ee102013-08-01 15:23:42 +0000120
Tom Stellard30d84d82013-08-16 01:12:16 +0000121 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
124 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
125 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
126 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
127 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
128 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
129 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
130 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
131 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
132 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
133
Tom Stellardd7a472c2013-07-23 01:47:46 +0000134 setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
135 setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
136
Christian Konig45b14e32013-03-27 09:12:51 +0000137 setOperationAction(ISD::MUL, MVT::i64, Expand);
138
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000139 setOperationAction(ISD::UDIV, MVT::i32, Expand);
140 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
141 setOperationAction(ISD::UREM, MVT::i32, Expand);
Tom Stellardf5660aa2013-07-18 21:43:35 +0000142 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
143 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000144
Tom Stellard5464a922013-08-21 22:14:17 +0000145 static const MVT::SimpleValueType IntTypes[] = {
146 MVT::v2i32, MVT::v4i32
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000147 };
Tom Stellard0991c312013-08-16 23:51:24 +0000148 const size_t NumIntTypes = array_lengthof(IntTypes);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000149
Tom Stellard0991c312013-08-16 23:51:24 +0000150 for (unsigned int x = 0; x < NumIntTypes; ++x) {
Tom Stellard5464a922013-08-21 22:14:17 +0000151 MVT::SimpleValueType VT = IntTypes[x];
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000152 //Expand the following operations for the current type by default
153 setOperationAction(ISD::ADD, VT, Expand);
154 setOperationAction(ISD::AND, VT, Expand);
Tom Stellarde3d60ac2013-07-30 14:31:03 +0000155 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
156 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000157 setOperationAction(ISD::MUL, VT, Expand);
158 setOperationAction(ISD::OR, VT, Expand);
159 setOperationAction(ISD::SHL, VT, Expand);
Tom Stellarde3d60ac2013-07-30 14:31:03 +0000160 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000161 setOperationAction(ISD::SRL, VT, Expand);
162 setOperationAction(ISD::SRA, VT, Expand);
163 setOperationAction(ISD::SUB, VT, Expand);
164 setOperationAction(ISD::UDIV, VT, Expand);
Tom Stellarde3d60ac2013-07-30 14:31:03 +0000165 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000166 setOperationAction(ISD::UREM, VT, Expand);
Tom Stellardf5660aa2013-07-18 21:43:35 +0000167 setOperationAction(ISD::VSELECT, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000168 setOperationAction(ISD::XOR, VT, Expand);
169 }
Tom Stellard0991c312013-08-16 23:51:24 +0000170
Tom Stellard5464a922013-08-21 22:14:17 +0000171 static const MVT::SimpleValueType FloatTypes[] = {
172 MVT::v2f32, MVT::v4f32
Tom Stellard0991c312013-08-16 23:51:24 +0000173 };
174 const size_t NumFloatTypes = array_lengthof(FloatTypes);
175
176 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
Tom Stellard5464a922013-08-21 22:14:17 +0000177 MVT::SimpleValueType VT = FloatTypes[x];
Tom Stellard0991c312013-08-16 23:51:24 +0000178 setOperationAction(ISD::FADD, VT, Expand);
179 setOperationAction(ISD::FDIV, VT, Expand);
Tom Stellard84c0bd92013-08-16 23:51:29 +0000180 setOperationAction(ISD::FFLOOR, VT, Expand);
Tom Stellard0991c312013-08-16 23:51:24 +0000181 setOperationAction(ISD::FMUL, VT, Expand);
Tom Stellard3cae8232013-08-16 23:51:33 +0000182 setOperationAction(ISD::FRINT, VT, Expand);
Tom Stellardf54a8402013-10-29 16:37:20 +0000183 setOperationAction(ISD::FSQRT, VT, Expand);
Tom Stellard0991c312013-08-16 23:51:24 +0000184 setOperationAction(ISD::FSUB, VT, Expand);
185 }
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000186}
187
Tom Stellard2b272a12013-08-05 22:22:07 +0000188//===----------------------------------------------------------------------===//
189// Target Information
190//===----------------------------------------------------------------------===//
191
192MVT AMDGPUTargetLowering::getVectorIdxTy() const {
193 return MVT::i32;
194}
195
196
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000197//===---------------------------------------------------------------------===//
Tom Stellard1f67c632013-07-23 23:55:03 +0000198// Target Properties
199//===---------------------------------------------------------------------===//
200
201bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
202 assert(VT.isFloatingPoint());
203 return VT == MVT::f32;
204}
205
206bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
207 assert(VT.isFloatingPoint());
208 return VT == MVT::f32;
209}
210
211//===---------------------------------------------------------------------===//
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000212// TargetLowering Callbacks
213//===---------------------------------------------------------------------===//
214
Christian Konig90c64cb2013-03-07 09:03:52 +0000215void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
216 const SmallVectorImpl<ISD::InputArg> &Ins) const {
217
218 State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000219}
220
221SDValue AMDGPUTargetLowering::LowerReturn(
222 SDValue Chain,
223 CallingConv::ID CallConv,
224 bool isVarArg,
225 const SmallVectorImpl<ISD::OutputArg> &Outs,
226 const SmallVectorImpl<SDValue> &OutVals,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000227 SDLoc DL, SelectionDAG &DAG) const {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000228 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
229}
230
231//===---------------------------------------------------------------------===//
232// Target specific lowering
233//===---------------------------------------------------------------------===//
234
235SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
236 const {
237 switch (Op.getOpcode()) {
238 default:
239 Op.getNode()->dump();
240 assert(0 && "Custom lowering code for this"
241 "instruction is not implemented yet!");
242 break;
243 // AMDIL DAG lowering
244 case ISD::SDIV: return LowerSDIV(Op, DAG);
245 case ISD::SREM: return LowerSREM(Op, DAG);
246 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
247 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
248 // AMDGPU DAG lowering
Tom Stellarda41520c2013-08-14 23:25:00 +0000249 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
250 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000251 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
Tom Stellard7a0282d2013-08-26 15:05:44 +0000252 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000253 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
254 }
255 return Op;
256}
257
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000258SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
259 SDValue Op,
260 SelectionDAG &DAG) const {
261
262 const DataLayout *TD = getTargetMachine().getDataLayout();
263 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
Tom Stellardda25cd32013-08-26 15:05:36 +0000264
265 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000266 // XXX: What does the value of G->getOffset() mean?
267 assert(G->getOffset() == 0 &&
268 "Do not know what to do with an non-zero offset");
269
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000270 const GlobalValue *GV = G->getGlobal();
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000271
Tom Stellard470c4512013-09-05 18:37:57 +0000272 unsigned Offset;
273 if (MFI->LocalMemoryObjects.count(GV) == 0) {
274 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
275 Offset = MFI->LDSSize;
276 MFI->LocalMemoryObjects[GV] = Offset;
277 // XXX: Account for alignment?
278 MFI->LDSSize += Size;
279 } else {
280 Offset = MFI->LocalMemoryObjects[GV];
281 }
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000282
Tom Stellardda25cd32013-08-26 15:05:36 +0000283 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000284}
285
Tom Stellarda41520c2013-08-14 23:25:00 +0000286void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
287 SmallVectorImpl<SDValue> &Args,
288 unsigned Start,
289 unsigned Count) const {
290 EVT VT = Op.getValueType();
291 for (unsigned i = Start, e = Start + Count; i != e; ++i) {
292 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
293 VT.getVectorElementType(),
294 Op, DAG.getConstant(i, MVT::i32)));
295 }
296}
297
298SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
299 SelectionDAG &DAG) const {
300 SmallVector<SDValue, 8> Args;
301 SDValue A = Op.getOperand(0);
302 SDValue B = Op.getOperand(1);
303
304 ExtractVectorElements(A, DAG, Args, 0,
305 A.getValueType().getVectorNumElements());
306 ExtractVectorElements(B, DAG, Args, 0,
307 B.getValueType().getVectorNumElements());
308
309 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
310 &Args[0], Args.size());
311}
312
313SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
314 SelectionDAG &DAG) const {
315
316 SmallVector<SDValue, 8> Args;
317 EVT VT = Op.getValueType();
318 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
319 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
320 VT.getVectorNumElements());
321
322 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
323 &Args[0], Args.size());
324}
325
326
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000327SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
328 SelectionDAG &DAG) const {
329 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
Andrew Trickac6d9be2013-05-25 02:42:55 +0000330 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000331 EVT VT = Op.getValueType();
332
333 switch (IntrinsicID) {
334 default: return Op;
335 case AMDGPUIntrinsic::AMDIL_abs:
336 return LowerIntrinsicIABS(Op, DAG);
337 case AMDGPUIntrinsic::AMDIL_exp:
338 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
339 case AMDGPUIntrinsic::AMDGPU_lrp:
340 return LowerIntrinsicLRP(Op, DAG);
341 case AMDGPUIntrinsic::AMDIL_fraction:
342 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000343 case AMDGPUIntrinsic::AMDIL_max:
344 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
345 Op.getOperand(2));
346 case AMDGPUIntrinsic::AMDGPU_imax:
347 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
348 Op.getOperand(2));
349 case AMDGPUIntrinsic::AMDGPU_umax:
350 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
351 Op.getOperand(2));
352 case AMDGPUIntrinsic::AMDIL_min:
353 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
354 Op.getOperand(2));
355 case AMDGPUIntrinsic::AMDGPU_imin:
356 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
357 Op.getOperand(2));
358 case AMDGPUIntrinsic::AMDGPU_umin:
359 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
360 Op.getOperand(2));
361 case AMDGPUIntrinsic::AMDIL_round_nearest:
362 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
363 }
364}
365
366///IABS(a) = SMAX(sub(0, a), a)
367SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
368 SelectionDAG &DAG) const {
369
Andrew Trickac6d9be2013-05-25 02:42:55 +0000370 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000371 EVT VT = Op.getValueType();
372 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
373 Op.getOperand(1));
374
375 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
376}
377
378/// Linear Interpolation
379/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
380SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
381 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000382 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000383 EVT VT = Op.getValueType();
384 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
385 DAG.getConstantFP(1.0f, MVT::f32),
386 Op.getOperand(1));
387 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
388 Op.getOperand(3));
Vincent Lejeunee3111962013-02-18 14:11:28 +0000389 return DAG.getNode(ISD::FADD, DL, VT,
390 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
391 OneSubAC);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000392}
393
394/// \brief Generate Min/Max node
395SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
396 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000397 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000398 EVT VT = Op.getValueType();
399
400 SDValue LHS = Op.getOperand(0);
401 SDValue RHS = Op.getOperand(1);
402 SDValue True = Op.getOperand(2);
403 SDValue False = Op.getOperand(3);
404 SDValue CC = Op.getOperand(4);
405
406 if (VT != MVT::f32 ||
407 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
408 return SDValue();
409 }
410
411 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
412 switch (CCOpcode) {
413 case ISD::SETOEQ:
414 case ISD::SETONE:
415 case ISD::SETUNE:
416 case ISD::SETNE:
417 case ISD::SETUEQ:
418 case ISD::SETEQ:
419 case ISD::SETFALSE:
420 case ISD::SETFALSE2:
421 case ISD::SETTRUE:
422 case ISD::SETTRUE2:
423 case ISD::SETUO:
424 case ISD::SETO:
425 assert(0 && "Operation should already be optimised !");
426 case ISD::SETULE:
427 case ISD::SETULT:
428 case ISD::SETOLE:
429 case ISD::SETOLT:
430 case ISD::SETLE:
431 case ISD::SETLT: {
432 if (LHS == True)
433 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
434 else
435 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
436 }
437 case ISD::SETGT:
438 case ISD::SETGE:
439 case ISD::SETUGE:
440 case ISD::SETOGE:
441 case ISD::SETUGT:
442 case ISD::SETOGT: {
443 if (LHS == True)
444 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
445 else
446 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
447 }
448 case ISD::SETCC_INVALID:
449 assert(0 && "Invalid setcc condcode !");
450 }
451 return Op;
452}
453
Tom Stellardd08a9302013-08-26 15:06:04 +0000454SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
455 SelectionDAG &DAG) const {
456 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
457 EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
458 EVT EltVT = Op.getValueType().getVectorElementType();
459 EVT PtrVT = Load->getBasePtr().getValueType();
460 unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
461 SmallVector<SDValue, 8> Loads;
462 SDLoc SL(Op);
463
464 for (unsigned i = 0, e = NumElts; i != e; ++i) {
465 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
466 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
467 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
468 Load->getChain(), Ptr,
469 MachinePointerInfo(Load->getMemOperand()->getValue()),
470 MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
471 Load->getAlignment()));
472 }
473 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
474 Loads.size());
475}
476
Tom Stellard7a0282d2013-08-26 15:05:44 +0000477SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
478 SelectionDAG &DAG) const {
479 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
480 EVT MemVT = Store->getMemoryVT();
481 unsigned MemBits = MemVT.getSizeInBits();
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000482
Tom Stellard7a0282d2013-08-26 15:05:44 +0000483 // Byte stores are really expensive, so if possible, try to pack
484 // 32-bit vector truncatating store into an i32 store.
485 // XXX: We could also handle optimize other vector bitwidths
486 if (!MemVT.isVector() || MemBits > 32) {
487 return SDValue();
488 }
489
490 SDLoc DL(Op);
491 const SDValue &Value = Store->getValue();
492 EVT VT = Value.getValueType();
493 const SDValue &Ptr = Store->getBasePtr();
494 EVT MemEltVT = MemVT.getVectorElementType();
495 unsigned MemEltBits = MemEltVT.getSizeInBits();
496 unsigned MemNumElements = MemVT.getVectorNumElements();
497 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
498 SDValue Mask;
499 switch(MemEltBits) {
500 case 8:
501 Mask = DAG.getConstant(0xFF, PackedVT);
502 break;
503 case 16:
504 Mask = DAG.getConstant(0xFFFF, PackedVT);
505 break;
506 default:
507 llvm_unreachable("Cannot lower this vector store");
508 }
509 SDValue PackedValue;
510 for (unsigned i = 0; i < MemNumElements; ++i) {
511 EVT ElemVT = VT.getVectorElementType();
512 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
513 DAG.getConstant(i, MVT::i32));
514 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
515 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
516 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
517 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
518 if (i == 0) {
519 PackedValue = Elt;
520 } else {
521 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
522 }
523 }
524 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
525 MachinePointerInfo(Store->getMemOperand()->getValue()),
526 Store->isVolatile(), Store->isNonTemporal(),
527 Store->getAlignment());
528}
529
530SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
531 SelectionDAG &DAG) const {
532 StoreSDNode *Store = cast<StoreSDNode>(Op);
533 EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
534 EVT EltVT = Store->getValue().getValueType().getVectorElementType();
535 EVT PtrVT = Store->getBasePtr().getValueType();
536 unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
537 SDLoc SL(Op);
538
539 SmallVector<SDValue, 8> Chains;
540
541 for (unsigned i = 0, e = NumElts; i != e; ++i) {
542 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
543 Store->getValue(), DAG.getConstant(i, MVT::i32));
544 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
545 Store->getBasePtr(),
546 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
547 PtrVT));
Tom Stellard8e780122013-08-26 15:05:49 +0000548 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
Tom Stellard7a0282d2013-08-26 15:05:44 +0000549 MachinePointerInfo(Store->getMemOperand()->getValue()),
Tom Stellard8e780122013-08-26 15:05:49 +0000550 MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
Tom Stellard7a0282d2013-08-26 15:05:44 +0000551 Store->getAlignment()));
552 }
553 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
554}
555
556SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
557 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
558 if (Result.getNode()) {
559 return Result;
560 }
561
562 StoreSDNode *Store = cast<StoreSDNode>(Op);
563 if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
564 Store->getValue().getValueType().isVector()) {
565 return SplitVectorStore(Op, DAG);
566 }
567 return SDValue();
568}
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000569
570SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
571 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000572 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000573 EVT VT = Op.getValueType();
574
575 SDValue Num = Op.getOperand(0);
576 SDValue Den = Op.getOperand(1);
577
578 SmallVector<SDValue, 8> Results;
579
580 // RCP = URECIP(Den) = 2^32 / Den + e
581 // e is rounding error.
582 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
583
584 // RCP_LO = umulo(RCP, Den) */
585 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
586
587 // RCP_HI = mulhu (RCP, Den) */
588 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
589
590 // NEG_RCP_LO = -RCP_LO
591 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
592 RCP_LO);
593
594 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
595 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
596 NEG_RCP_LO, RCP_LO,
597 ISD::SETEQ);
598 // Calculate the rounding error from the URECIP instruction
599 // E = mulhu(ABS_RCP_LO, RCP)
600 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
601
602 // RCP_A_E = RCP + E
603 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
604
605 // RCP_S_E = RCP - E
606 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
607
608 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
609 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
610 RCP_A_E, RCP_S_E,
611 ISD::SETEQ);
612 // Quotient = mulhu(Tmp0, Num)
613 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
614
615 // Num_S_Remainder = Quotient * Den
616 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
617
618 // Remainder = Num - Num_S_Remainder
619 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
620
621 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
622 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
623 DAG.getConstant(-1, VT),
624 DAG.getConstant(0, VT),
625 ISD::SETGE);
626 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
627 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
628 DAG.getConstant(0, VT),
629 DAG.getConstant(-1, VT),
630 DAG.getConstant(0, VT),
631 ISD::SETGE);
632 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
633 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
634 Remainder_GE_Zero);
635
636 // Calculate Division result:
637
638 // Quotient_A_One = Quotient + 1
639 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
640 DAG.getConstant(1, VT));
641
642 // Quotient_S_One = Quotient - 1
643 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
644 DAG.getConstant(1, VT));
645
646 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
647 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
648 Quotient, Quotient_A_One, ISD::SETEQ);
649
650 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
651 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
652 Quotient_S_One, Div, ISD::SETEQ);
653
654 // Calculate Rem result:
655
656 // Remainder_S_Den = Remainder - Den
657 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
658
659 // Remainder_A_Den = Remainder + Den
660 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
661
662 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
663 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
664 Remainder, Remainder_S_Den, ISD::SETEQ);
665
666 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
667 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
668 Remainder_A_Den, Rem, ISD::SETEQ);
669 SDValue Ops[2];
670 Ops[0] = Div;
671 Ops[1] = Rem;
672 return DAG.getMergeValues(Ops, 2, DL);
673}
674
Tom Stellard4c52d452013-08-16 01:12:11 +0000675
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000676//===----------------------------------------------------------------------===//
677// Helper functions
678//===----------------------------------------------------------------------===//
679
Tom Stellardf95b1622013-10-23 00:44:32 +0000680void AMDGPUTargetLowering::getOriginalFunctionArgs(
681 SelectionDAG &DAG,
682 const Function *F,
683 const SmallVectorImpl<ISD::InputArg> &Ins,
684 SmallVectorImpl<ISD::InputArg> &OrigIns) const {
685
686 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
687 if (Ins[i].ArgVT == Ins[i].VT) {
688 OrigIns.push_back(Ins[i]);
689 continue;
690 }
691
692 EVT VT;
693 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
694 // Vector has been split into scalars.
695 VT = Ins[i].ArgVT.getVectorElementType();
696 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
697 Ins[i].ArgVT.getVectorElementType() !=
698 Ins[i].VT.getVectorElementType()) {
699 // Vector elements have been promoted
700 VT = Ins[i].ArgVT;
701 } else {
702 // Vector has been spilt into smaller vectors.
703 VT = Ins[i].VT;
704 }
705
706 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
707 Ins[i].OrigArgIndex, Ins[i].PartOffset);
708 OrigIns.push_back(Arg);
709 }
710}
711
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000712bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
713 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
714 return CFP->isExactlyValue(1.0);
715 }
716 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
717 return C->isAllOnesValue();
718 }
719 return false;
720}
721
722bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
723 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
724 return CFP->getValueAPF().isZero();
725 }
726 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
727 return C->isNullValue();
728 }
729 return false;
730}
731
732SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
733 const TargetRegisterClass *RC,
734 unsigned Reg, EVT VT) const {
735 MachineFunction &MF = DAG.getMachineFunction();
736 MachineRegisterInfo &MRI = MF.getRegInfo();
737 unsigned VirtualRegister;
738 if (!MRI.isLiveIn(Reg)) {
739 VirtualRegister = MRI.createVirtualRegister(RC);
740 MRI.addLiveIn(Reg, VirtualRegister);
741 } else {
742 VirtualRegister = MRI.getLiveInVirtReg(Reg);
743 }
744 return DAG.getRegister(VirtualRegister, VT);
745}
746
747#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
748
749const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
750 switch (Opcode) {
751 default: return 0;
752 // AMDIL DAG nodes
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000753 NODE_NAME_CASE(CALL);
754 NODE_NAME_CASE(UMUL);
755 NODE_NAME_CASE(DIV_INF);
756 NODE_NAME_CASE(RET_FLAG);
757 NODE_NAME_CASE(BRANCH_COND);
758
759 // AMDGPU DAG nodes
760 NODE_NAME_CASE(DWORDADDR)
761 NODE_NAME_CASE(FRACT)
762 NODE_NAME_CASE(FMAX)
763 NODE_NAME_CASE(SMAX)
764 NODE_NAME_CASE(UMAX)
765 NODE_NAME_CASE(FMIN)
766 NODE_NAME_CASE(SMIN)
767 NODE_NAME_CASE(UMIN)
768 NODE_NAME_CASE(URECIP)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000769 NODE_NAME_CASE(EXPORT)
Tom Stellardc7e18882013-01-23 02:09:03 +0000770 NODE_NAME_CASE(CONST_ADDRESS)
Tom Stellardc0b0c672013-02-06 17:32:29 +0000771 NODE_NAME_CASE(REGISTER_LOAD)
772 NODE_NAME_CASE(REGISTER_STORE)
Tom Stellard68db37b2013-08-14 23:24:45 +0000773 NODE_NAME_CASE(LOAD_CONSTANT)
774 NODE_NAME_CASE(LOAD_INPUT)
775 NODE_NAME_CASE(SAMPLE)
776 NODE_NAME_CASE(SAMPLEB)
777 NODE_NAME_CASE(SAMPLED)
778 NODE_NAME_CASE(SAMPLEL)
Tom Stellardec484272013-08-16 01:12:06 +0000779 NODE_NAME_CASE(STORE_MSKOR)
Tom Stellarda3c2bcf2013-09-12 02:55:14 +0000780 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000781 }
782}