blob: b442c254732513bb543c4c98d6e36a2397bc146d [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000017#include "AMDGPU.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000018#include "AMDGPURegisterInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000019#include "AMDGPUSubtarget.h"
Benjamin Kramer5c352902013-05-23 17:10:37 +000020#include "AMDILIntrinsicInfo.h"
Tom Stellardf502c292013-07-23 01:48:05 +000021#include "R600MachineFunctionInfo.h"
Tom Stellarde7397ee2013-06-03 17:40:11 +000022#include "SIMachineFunctionInfo.h"
Christian Konig90c64cb2013-03-07 09:03:52 +000023#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000024#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Tom Stellarde3d4cbc2013-06-28 15:47:08 +000028#include "llvm/IR/DataLayout.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000029
30using namespace llvm;
Tom Stellardf95b1622013-10-23 00:44:32 +000031static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
32 CCValAssign::LocInfo LocInfo,
33 ISD::ArgFlagsTy ArgFlags, CCState &State) {
34 unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
35 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
36
37 return true;
38}
Tom Stellardf98f2ce2012-12-11 21:25:42 +000039
Christian Konig90c64cb2013-03-07 09:03:52 +000040#include "AMDGPUGenCallingConv.inc"
41
Tom Stellardf98f2ce2012-12-11 21:25:42 +000042AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
43 TargetLowering(TM, new TargetLoweringObjectFileELF()) {
44
45 // Initialize target lowering borrowed from AMDIL
46 InitAMDILLowering();
47
48 // We need to custom lower some of the intrinsics
49 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
50
51 // Library functions. These default to Expand, but we have instructions
52 // for them.
53 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
54 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
55 setOperationAction(ISD::FPOW, MVT::f32, Legal);
56 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
57 setOperationAction(ISD::FABS, MVT::f32, Legal);
58 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
59 setOperationAction(ISD::FRINT, MVT::f32, Legal);
60
Tom Stellardba534c22013-05-20 15:02:19 +000061 // The hardware supports ROTR, but not ROTL
62 setOperationAction(ISD::ROTL, MVT::i32, Expand);
63
Tom Stellardf98f2ce2012-12-11 21:25:42 +000064 // Lower floating point store/load to integer store/load to reduce the number
65 // of patterns in tablegen.
66 setOperationAction(ISD::STORE, MVT::f32, Promote);
67 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
68
Tom Stellardfc047272013-07-18 21:43:42 +000069 setOperationAction(ISD::STORE, MVT::v2f32, Promote);
70 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
71
Tom Stellardf98f2ce2012-12-11 21:25:42 +000072 setOperationAction(ISD::STORE, MVT::v4f32, Promote);
73 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
74
Tom Stellardf95b1622013-10-23 00:44:32 +000075 setOperationAction(ISD::STORE, MVT::v8f32, Promote);
76 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
77
78 setOperationAction(ISD::STORE, MVT::v16f32, Promote);
79 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
80
Tom Stellard68e13282013-07-12 18:14:56 +000081 setOperationAction(ISD::STORE, MVT::f64, Promote);
82 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
83
Tom Stellard7a0282d2013-08-26 15:05:44 +000084 // Custom lowering of vector stores is required for local address space
85 // stores.
86 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
87 // XXX: Native v2i32 local address space stores are possible, but not
88 // currently implemented.
89 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
90
Tom Stellard4c52d452013-08-16 01:12:11 +000091 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
92 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
93 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
94 // XXX: This can be change to Custom, once ExpandVectorStores can
95 // handle 64-bit stores.
96 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
97
Tom Stellardf98f2ce2012-12-11 21:25:42 +000098 setOperationAction(ISD::LOAD, MVT::f32, Promote);
99 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
100
Tom Stellardac85f3f2013-07-18 21:43:48 +0000101 setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
102 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
103
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000104 setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
105 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
106
Tom Stellardf95b1622013-10-23 00:44:32 +0000107 setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
108 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
109
110 setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
111 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
112
Tom Stellard68e13282013-07-12 18:14:56 +0000113 setOperationAction(ISD::LOAD, MVT::f64, Promote);
114 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
115
Tom Stellarda41520c2013-08-14 23:25:00 +0000116 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
117 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
118 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
119 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
Tom Stellard692ee102013-08-01 15:23:42 +0000120
Tom Stellard30d84d82013-08-16 01:12:16 +0000121 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
122 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
123 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
124 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
125 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
126 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
127 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
128 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
129 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
130 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
131 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
132 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
133
Tom Stellardd7a472c2013-07-23 01:47:46 +0000134 setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
135 setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
136
Christian Konig45b14e32013-03-27 09:12:51 +0000137 setOperationAction(ISD::MUL, MVT::i64, Expand);
138
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000139 setOperationAction(ISD::UDIV, MVT::i32, Expand);
140 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
141 setOperationAction(ISD::UREM, MVT::i32, Expand);
Tom Stellardf5660aa2013-07-18 21:43:35 +0000142 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
143 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000144
Tom Stellard5464a922013-08-21 22:14:17 +0000145 static const MVT::SimpleValueType IntTypes[] = {
146 MVT::v2i32, MVT::v4i32
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000147 };
Tom Stellard0991c312013-08-16 23:51:24 +0000148 const size_t NumIntTypes = array_lengthof(IntTypes);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000149
Tom Stellard0991c312013-08-16 23:51:24 +0000150 for (unsigned int x = 0; x < NumIntTypes; ++x) {
Tom Stellard5464a922013-08-21 22:14:17 +0000151 MVT::SimpleValueType VT = IntTypes[x];
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000152 //Expand the following operations for the current type by default
153 setOperationAction(ISD::ADD, VT, Expand);
154 setOperationAction(ISD::AND, VT, Expand);
Tom Stellarde3d60ac2013-07-30 14:31:03 +0000155 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
156 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000157 setOperationAction(ISD::MUL, VT, Expand);
158 setOperationAction(ISD::OR, VT, Expand);
159 setOperationAction(ISD::SHL, VT, Expand);
Tom Stellarde3d60ac2013-07-30 14:31:03 +0000160 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000161 setOperationAction(ISD::SRL, VT, Expand);
162 setOperationAction(ISD::SRA, VT, Expand);
163 setOperationAction(ISD::SUB, VT, Expand);
164 setOperationAction(ISD::UDIV, VT, Expand);
Tom Stellarde3d60ac2013-07-30 14:31:03 +0000165 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000166 setOperationAction(ISD::UREM, VT, Expand);
Tom Stellardf5660aa2013-07-18 21:43:35 +0000167 setOperationAction(ISD::VSELECT, VT, Expand);
Aaron Watryf97c7fe2013-06-25 13:55:57 +0000168 setOperationAction(ISD::XOR, VT, Expand);
169 }
Tom Stellard0991c312013-08-16 23:51:24 +0000170
Tom Stellard5464a922013-08-21 22:14:17 +0000171 static const MVT::SimpleValueType FloatTypes[] = {
172 MVT::v2f32, MVT::v4f32
Tom Stellard0991c312013-08-16 23:51:24 +0000173 };
174 const size_t NumFloatTypes = array_lengthof(FloatTypes);
175
176 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
Tom Stellard5464a922013-08-21 22:14:17 +0000177 MVT::SimpleValueType VT = FloatTypes[x];
Tom Stellard0991c312013-08-16 23:51:24 +0000178 setOperationAction(ISD::FADD, VT, Expand);
179 setOperationAction(ISD::FDIV, VT, Expand);
Tom Stellard84c0bd92013-08-16 23:51:29 +0000180 setOperationAction(ISD::FFLOOR, VT, Expand);
Tom Stellard0991c312013-08-16 23:51:24 +0000181 setOperationAction(ISD::FMUL, VT, Expand);
Tom Stellard3cae8232013-08-16 23:51:33 +0000182 setOperationAction(ISD::FRINT, VT, Expand);
Tom Stellard0991c312013-08-16 23:51:24 +0000183 setOperationAction(ISD::FSUB, VT, Expand);
184 }
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000185}
186
Tom Stellard2b272a12013-08-05 22:22:07 +0000187//===----------------------------------------------------------------------===//
188// Target Information
189//===----------------------------------------------------------------------===//
190
191MVT AMDGPUTargetLowering::getVectorIdxTy() const {
192 return MVT::i32;
193}
194
195
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000196//===---------------------------------------------------------------------===//
Tom Stellard1f67c632013-07-23 23:55:03 +0000197// Target Properties
198//===---------------------------------------------------------------------===//
199
200bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
201 assert(VT.isFloatingPoint());
202 return VT == MVT::f32;
203}
204
205bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
206 assert(VT.isFloatingPoint());
207 return VT == MVT::f32;
208}
209
210//===---------------------------------------------------------------------===//
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000211// TargetLowering Callbacks
212//===---------------------------------------------------------------------===//
213
Christian Konig90c64cb2013-03-07 09:03:52 +0000214void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
215 const SmallVectorImpl<ISD::InputArg> &Ins) const {
216
217 State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000218}
219
220SDValue AMDGPUTargetLowering::LowerReturn(
221 SDValue Chain,
222 CallingConv::ID CallConv,
223 bool isVarArg,
224 const SmallVectorImpl<ISD::OutputArg> &Outs,
225 const SmallVectorImpl<SDValue> &OutVals,
Andrew Trickac6d9be2013-05-25 02:42:55 +0000226 SDLoc DL, SelectionDAG &DAG) const {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000227 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
228}
229
230//===---------------------------------------------------------------------===//
231// Target specific lowering
232//===---------------------------------------------------------------------===//
233
234SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
235 const {
236 switch (Op.getOpcode()) {
237 default:
238 Op.getNode()->dump();
239 assert(0 && "Custom lowering code for this"
240 "instruction is not implemented yet!");
241 break;
242 // AMDIL DAG lowering
243 case ISD::SDIV: return LowerSDIV(Op, DAG);
244 case ISD::SREM: return LowerSREM(Op, DAG);
245 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
246 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
247 // AMDGPU DAG lowering
Tom Stellarda41520c2013-08-14 23:25:00 +0000248 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
249 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000250 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
Tom Stellard7a0282d2013-08-26 15:05:44 +0000251 case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000252 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
253 }
254 return Op;
255}
256
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000257SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
258 SDValue Op,
259 SelectionDAG &DAG) const {
260
261 const DataLayout *TD = getTargetMachine().getDataLayout();
262 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
Tom Stellardda25cd32013-08-26 15:05:36 +0000263
264 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000265 // XXX: What does the value of G->getOffset() mean?
266 assert(G->getOffset() == 0 &&
267 "Do not know what to do with an non-zero offset");
268
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000269 const GlobalValue *GV = G->getGlobal();
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000270
Tom Stellard470c4512013-09-05 18:37:57 +0000271 unsigned Offset;
272 if (MFI->LocalMemoryObjects.count(GV) == 0) {
273 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
274 Offset = MFI->LDSSize;
275 MFI->LocalMemoryObjects[GV] = Offset;
276 // XXX: Account for alignment?
277 MFI->LDSSize += Size;
278 } else {
279 Offset = MFI->LocalMemoryObjects[GV];
280 }
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000281
Tom Stellardda25cd32013-08-26 15:05:36 +0000282 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
Tom Stellarde3d4cbc2013-06-28 15:47:08 +0000283}
284
Tom Stellarda41520c2013-08-14 23:25:00 +0000285void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
286 SmallVectorImpl<SDValue> &Args,
287 unsigned Start,
288 unsigned Count) const {
289 EVT VT = Op.getValueType();
290 for (unsigned i = Start, e = Start + Count; i != e; ++i) {
291 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
292 VT.getVectorElementType(),
293 Op, DAG.getConstant(i, MVT::i32)));
294 }
295}
296
297SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
298 SelectionDAG &DAG) const {
299 SmallVector<SDValue, 8> Args;
300 SDValue A = Op.getOperand(0);
301 SDValue B = Op.getOperand(1);
302
303 ExtractVectorElements(A, DAG, Args, 0,
304 A.getValueType().getVectorNumElements());
305 ExtractVectorElements(B, DAG, Args, 0,
306 B.getValueType().getVectorNumElements());
307
308 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
309 &Args[0], Args.size());
310}
311
312SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
313 SelectionDAG &DAG) const {
314
315 SmallVector<SDValue, 8> Args;
316 EVT VT = Op.getValueType();
317 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
318 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
319 VT.getVectorNumElements());
320
321 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
322 &Args[0], Args.size());
323}
324
325
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000326SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
327 SelectionDAG &DAG) const {
328 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
Andrew Trickac6d9be2013-05-25 02:42:55 +0000329 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000330 EVT VT = Op.getValueType();
331
332 switch (IntrinsicID) {
333 default: return Op;
334 case AMDGPUIntrinsic::AMDIL_abs:
335 return LowerIntrinsicIABS(Op, DAG);
336 case AMDGPUIntrinsic::AMDIL_exp:
337 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
338 case AMDGPUIntrinsic::AMDGPU_lrp:
339 return LowerIntrinsicLRP(Op, DAG);
340 case AMDGPUIntrinsic::AMDIL_fraction:
341 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000342 case AMDGPUIntrinsic::AMDIL_max:
343 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
344 Op.getOperand(2));
345 case AMDGPUIntrinsic::AMDGPU_imax:
346 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
347 Op.getOperand(2));
348 case AMDGPUIntrinsic::AMDGPU_umax:
349 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
350 Op.getOperand(2));
351 case AMDGPUIntrinsic::AMDIL_min:
352 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
353 Op.getOperand(2));
354 case AMDGPUIntrinsic::AMDGPU_imin:
355 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
356 Op.getOperand(2));
357 case AMDGPUIntrinsic::AMDGPU_umin:
358 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
359 Op.getOperand(2));
360 case AMDGPUIntrinsic::AMDIL_round_nearest:
361 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
362 }
363}
364
365///IABS(a) = SMAX(sub(0, a), a)
366SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
367 SelectionDAG &DAG) const {
368
Andrew Trickac6d9be2013-05-25 02:42:55 +0000369 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000370 EVT VT = Op.getValueType();
371 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
372 Op.getOperand(1));
373
374 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
375}
376
377/// Linear Interpolation
378/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
379SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
380 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000381 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000382 EVT VT = Op.getValueType();
383 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
384 DAG.getConstantFP(1.0f, MVT::f32),
385 Op.getOperand(1));
386 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
387 Op.getOperand(3));
Vincent Lejeunee3111962013-02-18 14:11:28 +0000388 return DAG.getNode(ISD::FADD, DL, VT,
389 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
390 OneSubAC);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000391}
392
393/// \brief Generate Min/Max node
394SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
395 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000396 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000397 EVT VT = Op.getValueType();
398
399 SDValue LHS = Op.getOperand(0);
400 SDValue RHS = Op.getOperand(1);
401 SDValue True = Op.getOperand(2);
402 SDValue False = Op.getOperand(3);
403 SDValue CC = Op.getOperand(4);
404
405 if (VT != MVT::f32 ||
406 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
407 return SDValue();
408 }
409
410 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
411 switch (CCOpcode) {
412 case ISD::SETOEQ:
413 case ISD::SETONE:
414 case ISD::SETUNE:
415 case ISD::SETNE:
416 case ISD::SETUEQ:
417 case ISD::SETEQ:
418 case ISD::SETFALSE:
419 case ISD::SETFALSE2:
420 case ISD::SETTRUE:
421 case ISD::SETTRUE2:
422 case ISD::SETUO:
423 case ISD::SETO:
424 assert(0 && "Operation should already be optimised !");
425 case ISD::SETULE:
426 case ISD::SETULT:
427 case ISD::SETOLE:
428 case ISD::SETOLT:
429 case ISD::SETLE:
430 case ISD::SETLT: {
431 if (LHS == True)
432 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
433 else
434 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
435 }
436 case ISD::SETGT:
437 case ISD::SETGE:
438 case ISD::SETUGE:
439 case ISD::SETOGE:
440 case ISD::SETUGT:
441 case ISD::SETOGT: {
442 if (LHS == True)
443 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
444 else
445 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
446 }
447 case ISD::SETCC_INVALID:
448 assert(0 && "Invalid setcc condcode !");
449 }
450 return Op;
451}
452
Tom Stellardd08a9302013-08-26 15:06:04 +0000453SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
454 SelectionDAG &DAG) const {
455 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
456 EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
457 EVT EltVT = Op.getValueType().getVectorElementType();
458 EVT PtrVT = Load->getBasePtr().getValueType();
459 unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
460 SmallVector<SDValue, 8> Loads;
461 SDLoc SL(Op);
462
463 for (unsigned i = 0, e = NumElts; i != e; ++i) {
464 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
465 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
466 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
467 Load->getChain(), Ptr,
468 MachinePointerInfo(Load->getMemOperand()->getValue()),
469 MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
470 Load->getAlignment()));
471 }
472 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
473 Loads.size());
474}
475
Tom Stellard7a0282d2013-08-26 15:05:44 +0000476SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
477 SelectionDAG &DAG) const {
478 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
479 EVT MemVT = Store->getMemoryVT();
480 unsigned MemBits = MemVT.getSizeInBits();
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000481
Tom Stellard7a0282d2013-08-26 15:05:44 +0000482 // Byte stores are really expensive, so if possible, try to pack
483 // 32-bit vector truncatating store into an i32 store.
484 // XXX: We could also handle optimize other vector bitwidths
485 if (!MemVT.isVector() || MemBits > 32) {
486 return SDValue();
487 }
488
489 SDLoc DL(Op);
490 const SDValue &Value = Store->getValue();
491 EVT VT = Value.getValueType();
492 const SDValue &Ptr = Store->getBasePtr();
493 EVT MemEltVT = MemVT.getVectorElementType();
494 unsigned MemEltBits = MemEltVT.getSizeInBits();
495 unsigned MemNumElements = MemVT.getVectorNumElements();
496 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
497 SDValue Mask;
498 switch(MemEltBits) {
499 case 8:
500 Mask = DAG.getConstant(0xFF, PackedVT);
501 break;
502 case 16:
503 Mask = DAG.getConstant(0xFFFF, PackedVT);
504 break;
505 default:
506 llvm_unreachable("Cannot lower this vector store");
507 }
508 SDValue PackedValue;
509 for (unsigned i = 0; i < MemNumElements; ++i) {
510 EVT ElemVT = VT.getVectorElementType();
511 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
512 DAG.getConstant(i, MVT::i32));
513 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
514 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
515 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
516 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
517 if (i == 0) {
518 PackedValue = Elt;
519 } else {
520 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
521 }
522 }
523 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
524 MachinePointerInfo(Store->getMemOperand()->getValue()),
525 Store->isVolatile(), Store->isNonTemporal(),
526 Store->getAlignment());
527}
528
529SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
530 SelectionDAG &DAG) const {
531 StoreSDNode *Store = cast<StoreSDNode>(Op);
532 EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
533 EVT EltVT = Store->getValue().getValueType().getVectorElementType();
534 EVT PtrVT = Store->getBasePtr().getValueType();
535 unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
536 SDLoc SL(Op);
537
538 SmallVector<SDValue, 8> Chains;
539
540 for (unsigned i = 0, e = NumElts; i != e; ++i) {
541 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
542 Store->getValue(), DAG.getConstant(i, MVT::i32));
543 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
544 Store->getBasePtr(),
545 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
546 PtrVT));
Tom Stellard8e780122013-08-26 15:05:49 +0000547 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
Tom Stellard7a0282d2013-08-26 15:05:44 +0000548 MachinePointerInfo(Store->getMemOperand()->getValue()),
Tom Stellard8e780122013-08-26 15:05:49 +0000549 MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
Tom Stellard7a0282d2013-08-26 15:05:44 +0000550 Store->getAlignment()));
551 }
552 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
553}
554
555SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
556 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
557 if (Result.getNode()) {
558 return Result;
559 }
560
561 StoreSDNode *Store = cast<StoreSDNode>(Op);
562 if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
563 Store->getValue().getValueType().isVector()) {
564 return SplitVectorStore(Op, DAG);
565 }
566 return SDValue();
567}
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000568
569SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
570 SelectionDAG &DAG) const {
Andrew Trickac6d9be2013-05-25 02:42:55 +0000571 SDLoc DL(Op);
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000572 EVT VT = Op.getValueType();
573
574 SDValue Num = Op.getOperand(0);
575 SDValue Den = Op.getOperand(1);
576
577 SmallVector<SDValue, 8> Results;
578
579 // RCP = URECIP(Den) = 2^32 / Den + e
580 // e is rounding error.
581 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
582
583 // RCP_LO = umulo(RCP, Den) */
584 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
585
586 // RCP_HI = mulhu (RCP, Den) */
587 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
588
589 // NEG_RCP_LO = -RCP_LO
590 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
591 RCP_LO);
592
593 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
594 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
595 NEG_RCP_LO, RCP_LO,
596 ISD::SETEQ);
597 // Calculate the rounding error from the URECIP instruction
598 // E = mulhu(ABS_RCP_LO, RCP)
599 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
600
601 // RCP_A_E = RCP + E
602 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
603
604 // RCP_S_E = RCP - E
605 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
606
607 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
608 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
609 RCP_A_E, RCP_S_E,
610 ISD::SETEQ);
611 // Quotient = mulhu(Tmp0, Num)
612 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
613
614 // Num_S_Remainder = Quotient * Den
615 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
616
617 // Remainder = Num - Num_S_Remainder
618 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
619
620 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
621 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
622 DAG.getConstant(-1, VT),
623 DAG.getConstant(0, VT),
624 ISD::SETGE);
625 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
626 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
627 DAG.getConstant(0, VT),
628 DAG.getConstant(-1, VT),
629 DAG.getConstant(0, VT),
630 ISD::SETGE);
631 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
632 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
633 Remainder_GE_Zero);
634
635 // Calculate Division result:
636
637 // Quotient_A_One = Quotient + 1
638 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
639 DAG.getConstant(1, VT));
640
641 // Quotient_S_One = Quotient - 1
642 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
643 DAG.getConstant(1, VT));
644
645 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
646 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
647 Quotient, Quotient_A_One, ISD::SETEQ);
648
649 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
650 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
651 Quotient_S_One, Div, ISD::SETEQ);
652
653 // Calculate Rem result:
654
655 // Remainder_S_Den = Remainder - Den
656 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
657
658 // Remainder_A_Den = Remainder + Den
659 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
660
661 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
662 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
663 Remainder, Remainder_S_Den, ISD::SETEQ);
664
665 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
666 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
667 Remainder_A_Den, Rem, ISD::SETEQ);
668 SDValue Ops[2];
669 Ops[0] = Div;
670 Ops[1] = Rem;
671 return DAG.getMergeValues(Ops, 2, DL);
672}
673
Tom Stellard4c52d452013-08-16 01:12:11 +0000674
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000675//===----------------------------------------------------------------------===//
676// Helper functions
677//===----------------------------------------------------------------------===//
678
Tom Stellardf95b1622013-10-23 00:44:32 +0000679void AMDGPUTargetLowering::getOriginalFunctionArgs(
680 SelectionDAG &DAG,
681 const Function *F,
682 const SmallVectorImpl<ISD::InputArg> &Ins,
683 SmallVectorImpl<ISD::InputArg> &OrigIns) const {
684
685 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
686 if (Ins[i].ArgVT == Ins[i].VT) {
687 OrigIns.push_back(Ins[i]);
688 continue;
689 }
690
691 EVT VT;
692 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
693 // Vector has been split into scalars.
694 VT = Ins[i].ArgVT.getVectorElementType();
695 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
696 Ins[i].ArgVT.getVectorElementType() !=
697 Ins[i].VT.getVectorElementType()) {
698 // Vector elements have been promoted
699 VT = Ins[i].ArgVT;
700 } else {
701 // Vector has been spilt into smaller vectors.
702 VT = Ins[i].VT;
703 }
704
705 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
706 Ins[i].OrigArgIndex, Ins[i].PartOffset);
707 OrigIns.push_back(Arg);
708 }
709}
710
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000711bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
712 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
713 return CFP->isExactlyValue(1.0);
714 }
715 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
716 return C->isAllOnesValue();
717 }
718 return false;
719}
720
721bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
722 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
723 return CFP->getValueAPF().isZero();
724 }
725 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
726 return C->isNullValue();
727 }
728 return false;
729}
730
731SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
732 const TargetRegisterClass *RC,
733 unsigned Reg, EVT VT) const {
734 MachineFunction &MF = DAG.getMachineFunction();
735 MachineRegisterInfo &MRI = MF.getRegInfo();
736 unsigned VirtualRegister;
737 if (!MRI.isLiveIn(Reg)) {
738 VirtualRegister = MRI.createVirtualRegister(RC);
739 MRI.addLiveIn(Reg, VirtualRegister);
740 } else {
741 VirtualRegister = MRI.getLiveInVirtReg(Reg);
742 }
743 return DAG.getRegister(VirtualRegister, VT);
744}
745
746#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
747
748const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
749 switch (Opcode) {
750 default: return 0;
751 // AMDIL DAG nodes
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000752 NODE_NAME_CASE(CALL);
753 NODE_NAME_CASE(UMUL);
754 NODE_NAME_CASE(DIV_INF);
755 NODE_NAME_CASE(RET_FLAG);
756 NODE_NAME_CASE(BRANCH_COND);
757
758 // AMDGPU DAG nodes
759 NODE_NAME_CASE(DWORDADDR)
760 NODE_NAME_CASE(FRACT)
761 NODE_NAME_CASE(FMAX)
762 NODE_NAME_CASE(SMAX)
763 NODE_NAME_CASE(UMAX)
764 NODE_NAME_CASE(FMIN)
765 NODE_NAME_CASE(SMIN)
766 NODE_NAME_CASE(UMIN)
767 NODE_NAME_CASE(URECIP)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000768 NODE_NAME_CASE(EXPORT)
Tom Stellardc7e18882013-01-23 02:09:03 +0000769 NODE_NAME_CASE(CONST_ADDRESS)
Tom Stellardc0b0c672013-02-06 17:32:29 +0000770 NODE_NAME_CASE(REGISTER_LOAD)
771 NODE_NAME_CASE(REGISTER_STORE)
Tom Stellard68db37b2013-08-14 23:24:45 +0000772 NODE_NAME_CASE(LOAD_CONSTANT)
773 NODE_NAME_CASE(LOAD_INPUT)
774 NODE_NAME_CASE(SAMPLE)
775 NODE_NAME_CASE(SAMPLEB)
776 NODE_NAME_CASE(SAMPLED)
777 NODE_NAME_CASE(SAMPLEL)
Tom Stellardec484272013-08-16 01:12:06 +0000778 NODE_NAME_CASE(STORE_MSKOR)
Tom Stellarda3c2bcf2013-09-12 02:55:14 +0000779 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000780 }
781}