blob: e0ed7216c15b383d306beb0ed32667beeb980e09 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
Tom Stellarded882c22013-06-03 17:40:11 +000017#include "AMDGPU.h"
Tom Stellard81d871d2013-11-13 23:36:50 +000018#include "AMDGPUFrameLowering.h"
Christian Konig2c8f6d52013-03-07 09:03:52 +000019#include "AMDGPURegisterInfo.h"
Christian Konig2c8f6d52013-03-07 09:03:52 +000020#include "AMDGPUSubtarget.h"
Benjamin Kramerd78bb462013-05-23 17:10:37 +000021#include "AMDILIntrinsicInfo.h"
Tom Stellardacfeebf2013-07-23 01:48:05 +000022#include "R600MachineFunctionInfo.h"
Tom Stellarded882c22013-06-03 17:40:11 +000023#include "SIMachineFunctionInfo.h"
Tom Stellard04c0e982014-01-22 19:24:21 +000024#include "llvm/Analysis/ValueTracking.h"
Christian Konig2c8f6d52013-03-07 09:03:52 +000025#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000026#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Tom Stellardc026e8b2013-06-28 15:47:08 +000030#include "llvm/IR/DataLayout.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000031
32using namespace llvm;
Tom Stellardaf775432013-10-23 00:44:32 +000033static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
34 CCValAssign::LocInfo LocInfo,
35 ISD::ArgFlagsTy ArgFlags, CCState &State) {
Matt Arsenault52226f92013-12-14 18:21:59 +000036 unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
37 ArgFlags.getOrigAlign());
38 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
Tom Stellardaf775432013-10-23 00:44:32 +000039
40 return true;
41}
Tom Stellard75aadc22012-12-11 21:25:42 +000042
Christian Konig2c8f6d52013-03-07 09:03:52 +000043#include "AMDGPUGenCallingConv.inc"
44
Tom Stellard75aadc22012-12-11 21:25:42 +000045AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
46 TargetLowering(TM, new TargetLoweringObjectFileELF()) {
47
48 // Initialize target lowering borrowed from AMDIL
49 InitAMDILLowering();
50
51 // We need to custom lower some of the intrinsics
52 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
53
54 // Library functions. These default to Expand, but we have instructions
55 // for them.
56 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
57 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
58 setOperationAction(ISD::FPOW, MVT::f32, Legal);
59 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
60 setOperationAction(ISD::FABS, MVT::f32, Legal);
61 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
62 setOperationAction(ISD::FRINT, MVT::f32, Legal);
Tom Stellard4d566b22013-11-27 21:23:20 +000063 setOperationAction(ISD::FROUND, MVT::f32, Legal);
Tom Stellardeddfa692013-12-20 05:11:55 +000064 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
Tom Stellard75aadc22012-12-11 21:25:42 +000065
Tom Stellard5643c4a2013-05-20 15:02:19 +000066 // The hardware supports ROTR, but not ROTL
67 setOperationAction(ISD::ROTL, MVT::i32, Expand);
68
Tom Stellard75aadc22012-12-11 21:25:42 +000069 // Lower floating point store/load to integer store/load to reduce the number
70 // of patterns in tablegen.
71 setOperationAction(ISD::STORE, MVT::f32, Promote);
72 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
73
Tom Stellarded2f6142013-07-18 21:43:42 +000074 setOperationAction(ISD::STORE, MVT::v2f32, Promote);
75 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
76
Tom Stellard75aadc22012-12-11 21:25:42 +000077 setOperationAction(ISD::STORE, MVT::v4f32, Promote);
78 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
79
Tom Stellardaf775432013-10-23 00:44:32 +000080 setOperationAction(ISD::STORE, MVT::v8f32, Promote);
81 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
82
83 setOperationAction(ISD::STORE, MVT::v16f32, Promote);
84 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
85
Tom Stellard7512c082013-07-12 18:14:56 +000086 setOperationAction(ISD::STORE, MVT::f64, Promote);
87 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
88
Tom Stellard2ffc3302013-08-26 15:05:44 +000089 // Custom lowering of vector stores is required for local address space
90 // stores.
91 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
92 // XXX: Native v2i32 local address space stores are possible, but not
93 // currently implemented.
94 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
95
Tom Stellardfbab8272013-08-16 01:12:11 +000096 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
97 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
98 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
99 // XXX: This can be change to Custom, once ExpandVectorStores can
100 // handle 64-bit stores.
101 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
102
Tom Stellard75aadc22012-12-11 21:25:42 +0000103 setOperationAction(ISD::LOAD, MVT::f32, Promote);
104 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
105
Tom Stellardadf732c2013-07-18 21:43:48 +0000106 setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
107 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
108
Tom Stellard75aadc22012-12-11 21:25:42 +0000109 setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
110 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
111
Tom Stellardaf775432013-10-23 00:44:32 +0000112 setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
113 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
114
115 setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
116 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
117
Tom Stellard7512c082013-07-12 18:14:56 +0000118 setOperationAction(ISD::LOAD, MVT::f64, Promote);
119 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
120
Tom Stellardd86003e2013-08-14 23:25:00 +0000121 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
122 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
123 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
124 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
Tom Stellard0344cdf2013-08-01 15:23:42 +0000125
Tom Stellardb03edec2013-08-16 01:12:16 +0000126 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
127 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
128 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
129 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
130 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
131 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
132 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
133 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
134 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
135 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
136 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
137 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
138
Tom Stellardbeed74a2013-07-23 01:47:46 +0000139 setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
140 setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
141
Tom Stellardc947d8c2013-10-30 17:22:05 +0000142 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
143
Christian Konig70a50322013-03-27 09:12:51 +0000144 setOperationAction(ISD::MUL, MVT::i64, Expand);
145
Tom Stellard75aadc22012-12-11 21:25:42 +0000146 setOperationAction(ISD::UDIV, MVT::i32, Expand);
147 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
148 setOperationAction(ISD::UREM, MVT::i32, Expand);
Tom Stellard67ae4762013-07-18 21:43:35 +0000149 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
150 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
Aaron Watry0a794a462013-06-25 13:55:57 +0000151
Tom Stellardf6d80232013-08-21 22:14:17 +0000152 static const MVT::SimpleValueType IntTypes[] = {
153 MVT::v2i32, MVT::v4i32
Aaron Watry0a794a462013-06-25 13:55:57 +0000154 };
Tom Stellarda92ff872013-08-16 23:51:24 +0000155 const size_t NumIntTypes = array_lengthof(IntTypes);
Aaron Watry0a794a462013-06-25 13:55:57 +0000156
Tom Stellarda92ff872013-08-16 23:51:24 +0000157 for (unsigned int x = 0; x < NumIntTypes; ++x) {
Tom Stellardf6d80232013-08-21 22:14:17 +0000158 MVT::SimpleValueType VT = IntTypes[x];
Aaron Watry0a794a462013-06-25 13:55:57 +0000159 //Expand the following operations for the current type by default
160 setOperationAction(ISD::ADD, VT, Expand);
161 setOperationAction(ISD::AND, VT, Expand);
Tom Stellardaa313d02013-07-30 14:31:03 +0000162 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
163 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
Aaron Watry0a794a462013-06-25 13:55:57 +0000164 setOperationAction(ISD::MUL, VT, Expand);
165 setOperationAction(ISD::OR, VT, Expand);
166 setOperationAction(ISD::SHL, VT, Expand);
Tom Stellardaa313d02013-07-30 14:31:03 +0000167 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
Aaron Watry0a794a462013-06-25 13:55:57 +0000168 setOperationAction(ISD::SRL, VT, Expand);
169 setOperationAction(ISD::SRA, VT, Expand);
170 setOperationAction(ISD::SUB, VT, Expand);
171 setOperationAction(ISD::UDIV, VT, Expand);
Tom Stellardaa313d02013-07-30 14:31:03 +0000172 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
Aaron Watry0a794a462013-06-25 13:55:57 +0000173 setOperationAction(ISD::UREM, VT, Expand);
Tom Stellard67ae4762013-07-18 21:43:35 +0000174 setOperationAction(ISD::VSELECT, VT, Expand);
Aaron Watry0a794a462013-06-25 13:55:57 +0000175 setOperationAction(ISD::XOR, VT, Expand);
176 }
Tom Stellarda92ff872013-08-16 23:51:24 +0000177
Tom Stellardf6d80232013-08-21 22:14:17 +0000178 static const MVT::SimpleValueType FloatTypes[] = {
179 MVT::v2f32, MVT::v4f32
Tom Stellarda92ff872013-08-16 23:51:24 +0000180 };
181 const size_t NumFloatTypes = array_lengthof(FloatTypes);
182
183 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
Tom Stellardf6d80232013-08-21 22:14:17 +0000184 MVT::SimpleValueType VT = FloatTypes[x];
Tom Stellard175e7a82013-11-27 21:23:39 +0000185 setOperationAction(ISD::FABS, VT, Expand);
Tom Stellarda92ff872013-08-16 23:51:24 +0000186 setOperationAction(ISD::FADD, VT, Expand);
187 setOperationAction(ISD::FDIV, VT, Expand);
Tom Stellardad3aff22013-08-16 23:51:29 +0000188 setOperationAction(ISD::FFLOOR, VT, Expand);
Tom Stellardeddfa692013-12-20 05:11:55 +0000189 setOperationAction(ISD::FTRUNC, VT, Expand);
Tom Stellarda92ff872013-08-16 23:51:24 +0000190 setOperationAction(ISD::FMUL, VT, Expand);
Tom Stellardb249b752013-08-16 23:51:33 +0000191 setOperationAction(ISD::FRINT, VT, Expand);
Tom Stellarde118b8b2013-10-29 16:37:20 +0000192 setOperationAction(ISD::FSQRT, VT, Expand);
Tom Stellarda92ff872013-08-16 23:51:24 +0000193 setOperationAction(ISD::FSUB, VT, Expand);
194 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000195}
196
Tom Stellard28d06de2013-08-05 22:22:07 +0000197//===----------------------------------------------------------------------===//
198// Target Information
199//===----------------------------------------------------------------------===//
200
201MVT AMDGPUTargetLowering::getVectorIdxTy() const {
202 return MVT::i32;
203}
204
Matt Arsenaultc5559bb2013-11-15 04:42:23 +0000205bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
206 EVT CastTy) const {
207 if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
208 return true;
209
210 unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
211 unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
212
213 return ((LScalarSize <= CastScalarSize) ||
214 (CastScalarSize >= 32) ||
215 (LScalarSize < 32));
216}
Tom Stellard28d06de2013-08-05 22:22:07 +0000217
Tom Stellard75aadc22012-12-11 21:25:42 +0000218//===---------------------------------------------------------------------===//
Tom Stellardc54731a2013-07-23 23:55:03 +0000219// Target Properties
220//===---------------------------------------------------------------------===//
221
222bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
223 assert(VT.isFloatingPoint());
224 return VT == MVT::f32;
225}
226
227bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
228 assert(VT.isFloatingPoint());
229 return VT == MVT::f32;
230}
231
232//===---------------------------------------------------------------------===//
Tom Stellard75aadc22012-12-11 21:25:42 +0000233// TargetLowering Callbacks
234//===---------------------------------------------------------------------===//
235
Christian Konig2c8f6d52013-03-07 09:03:52 +0000236void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
237 const SmallVectorImpl<ISD::InputArg> &Ins) const {
238
239 State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
Tom Stellard75aadc22012-12-11 21:25:42 +0000240}
241
242SDValue AMDGPUTargetLowering::LowerReturn(
243 SDValue Chain,
244 CallingConv::ID CallConv,
245 bool isVarArg,
246 const SmallVectorImpl<ISD::OutputArg> &Outs,
247 const SmallVectorImpl<SDValue> &OutVals,
Andrew Trickef9de2a2013-05-25 02:42:55 +0000248 SDLoc DL, SelectionDAG &DAG) const {
Tom Stellard75aadc22012-12-11 21:25:42 +0000249 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
250}
251
252//===---------------------------------------------------------------------===//
253// Target specific lowering
254//===---------------------------------------------------------------------===//
255
256SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
257 const {
258 switch (Op.getOpcode()) {
259 default:
260 Op.getNode()->dump();
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000261 llvm_unreachable("Custom lowering code for this"
262 "instruction is not implemented yet!");
Tom Stellard75aadc22012-12-11 21:25:42 +0000263 break;
264 // AMDIL DAG lowering
265 case ISD::SDIV: return LowerSDIV(Op, DAG);
266 case ISD::SREM: return LowerSREM(Op, DAG);
267 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
268 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
269 // AMDGPU DAG lowering
Tom Stellardd86003e2013-08-14 23:25:00 +0000270 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
271 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
Tom Stellard81d871d2013-11-13 23:36:50 +0000272 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000273 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
274 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
Tom Stellardc947d8c2013-10-30 17:22:05 +0000275 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 }
277 return Op;
278}
279
Tom Stellard04c0e982014-01-22 19:24:21 +0000280SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
281 const GlobalValue *GV,
282 const SDValue &InitPtr,
283 SDValue Chain,
284 SelectionDAG &DAG) const {
285 const DataLayout *TD = getTargetMachine().getDataLayout();
286 SDLoc DL(InitPtr);
287 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
288 EVT VT = EVT::getEVT(CI->getType());
289 PointerType *PtrTy = PointerType::get(CI->getType(), 0);
290 return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
291 MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
292 TD->getPrefTypeAlignment(CI->getType()));
293 } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
294 EVT VT = EVT::getEVT(CFP->getType());
295 PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
296 return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
297 MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
298 TD->getPrefTypeAlignment(CFP->getType()));
299 } else if (Init->getType()->isAggregateType()) {
300 EVT PtrVT = InitPtr.getValueType();
301 unsigned NumElements = Init->getType()->getArrayNumElements();
302 SmallVector<SDValue, 8> Chains;
303 for (unsigned i = 0; i < NumElements; ++i) {
304 SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize(
305 Init->getType()->getArrayElementType()), PtrVT);
306 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
307 Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i),
308 GV, Ptr, Chain, DAG));
309 }
310 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
311 Chains.size());
312 } else {
313 Init->dump();
314 llvm_unreachable("Unhandled constant initializer");
315 }
316}
317
Tom Stellardc026e8b2013-06-28 15:47:08 +0000318SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
319 SDValue Op,
320 SelectionDAG &DAG) const {
321
322 const DataLayout *TD = getTargetMachine().getDataLayout();
323 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000324 const GlobalValue *GV = G->getGlobal();
Tom Stellardc026e8b2013-06-28 15:47:08 +0000325
Tom Stellard04c0e982014-01-22 19:24:21 +0000326 switch (G->getAddressSpace()) {
327 default: llvm_unreachable("Global Address lowering not implemented for this "
328 "address space");
329 case AMDGPUAS::LOCAL_ADDRESS: {
330 // XXX: What does the value of G->getOffset() mean?
331 assert(G->getOffset() == 0 &&
332 "Do not know what to do with an non-zero offset");
Tom Stellardc026e8b2013-06-28 15:47:08 +0000333
Tom Stellard04c0e982014-01-22 19:24:21 +0000334 unsigned Offset;
335 if (MFI->LocalMemoryObjects.count(GV) == 0) {
336 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
337 Offset = MFI->LDSSize;
338 MFI->LocalMemoryObjects[GV] = Offset;
339 // XXX: Account for alignment?
340 MFI->LDSSize += Size;
341 } else {
342 Offset = MFI->LocalMemoryObjects[GV];
343 }
344
345 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
346 }
347 case AMDGPUAS::CONSTANT_ADDRESS: {
348 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
349 Type *EltType = GV->getType()->getElementType();
350 unsigned Size = TD->getTypeAllocSize(EltType);
351 unsigned Alignment = TD->getPrefTypeAlignment(EltType);
352
353 const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV);
354 const Constant *Init = Var->getInitializer();
355 int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
356 SDValue InitPtr = DAG.getFrameIndex(FI,
357 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
358 SmallVector<SDNode*, 8> WorkList;
359
360 for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
361 E = DAG.getEntryNode()->use_end(); I != E; ++I) {
362 if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
363 continue;
364 WorkList.push_back(*I);
365 }
366 SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
367 for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
368 E = WorkList.end(); I != E; ++I) {
369 SmallVector<SDValue, 8> Ops;
370 Ops.push_back(Chain);
371 for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
372 Ops.push_back((*I)->getOperand(i));
373 }
374 DAG.UpdateNodeOperands(*I, &Ops[0], Ops.size());
375 }
376 return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op),
377 getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
378 }
379 }
Tom Stellardc026e8b2013-06-28 15:47:08 +0000380}
381
Tom Stellardd86003e2013-08-14 23:25:00 +0000382void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
383 SmallVectorImpl<SDValue> &Args,
384 unsigned Start,
385 unsigned Count) const {
386 EVT VT = Op.getValueType();
387 for (unsigned i = Start, e = Start + Count; i != e; ++i) {
388 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
389 VT.getVectorElementType(),
390 Op, DAG.getConstant(i, MVT::i32)));
391 }
392}
393
394SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
395 SelectionDAG &DAG) const {
396 SmallVector<SDValue, 8> Args;
397 SDValue A = Op.getOperand(0);
398 SDValue B = Op.getOperand(1);
399
400 ExtractVectorElements(A, DAG, Args, 0,
401 A.getValueType().getVectorNumElements());
402 ExtractVectorElements(B, DAG, Args, 0,
403 B.getValueType().getVectorNumElements());
404
405 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
406 &Args[0], Args.size());
407}
408
409SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
410 SelectionDAG &DAG) const {
411
412 SmallVector<SDValue, 8> Args;
413 EVT VT = Op.getValueType();
414 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
415 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
416 VT.getVectorNumElements());
417
418 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
419 &Args[0], Args.size());
420}
421
Tom Stellard81d871d2013-11-13 23:36:50 +0000422SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
423 SelectionDAG &DAG) const {
424
425 MachineFunction &MF = DAG.getMachineFunction();
426 const AMDGPUFrameLowering *TFL =
427 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
428
429 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
430 assert(FIN);
431
432 unsigned FrameIndex = FIN->getIndex();
433 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
434 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
435 Op.getValueType());
436}
Tom Stellardd86003e2013-08-14 23:25:00 +0000437
Tom Stellard75aadc22012-12-11 21:25:42 +0000438SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
439 SelectionDAG &DAG) const {
440 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000441 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000442 EVT VT = Op.getValueType();
443
444 switch (IntrinsicID) {
445 default: return Op;
446 case AMDGPUIntrinsic::AMDIL_abs:
447 return LowerIntrinsicIABS(Op, DAG);
448 case AMDGPUIntrinsic::AMDIL_exp:
449 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
450 case AMDGPUIntrinsic::AMDGPU_lrp:
451 return LowerIntrinsicLRP(Op, DAG);
452 case AMDGPUIntrinsic::AMDIL_fraction:
453 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard75aadc22012-12-11 21:25:42 +0000454 case AMDGPUIntrinsic::AMDIL_max:
455 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
456 Op.getOperand(2));
457 case AMDGPUIntrinsic::AMDGPU_imax:
458 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
459 Op.getOperand(2));
460 case AMDGPUIntrinsic::AMDGPU_umax:
461 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
462 Op.getOperand(2));
463 case AMDGPUIntrinsic::AMDIL_min:
464 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
465 Op.getOperand(2));
466 case AMDGPUIntrinsic::AMDGPU_imin:
467 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
468 Op.getOperand(2));
469 case AMDGPUIntrinsic::AMDGPU_umin:
470 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
471 Op.getOperand(2));
472 case AMDGPUIntrinsic::AMDIL_round_nearest:
473 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
474 }
475}
476
477///IABS(a) = SMAX(sub(0, a), a)
478SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
479 SelectionDAG &DAG) const {
480
Andrew Trickef9de2a2013-05-25 02:42:55 +0000481 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000482 EVT VT = Op.getValueType();
483 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
484 Op.getOperand(1));
485
486 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
487}
488
489/// Linear Interpolation
490/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
491SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
492 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000493 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000494 EVT VT = Op.getValueType();
495 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
496 DAG.getConstantFP(1.0f, MVT::f32),
497 Op.getOperand(1));
498 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
499 Op.getOperand(3));
Vincent Lejeune1ce13f52013-02-18 14:11:28 +0000500 return DAG.getNode(ISD::FADD, DL, VT,
501 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
502 OneSubAC);
Tom Stellard75aadc22012-12-11 21:25:42 +0000503}
504
505/// \brief Generate Min/Max node
506SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
507 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000508 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000509 EVT VT = Op.getValueType();
510
511 SDValue LHS = Op.getOperand(0);
512 SDValue RHS = Op.getOperand(1);
513 SDValue True = Op.getOperand(2);
514 SDValue False = Op.getOperand(3);
515 SDValue CC = Op.getOperand(4);
516
517 if (VT != MVT::f32 ||
518 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
519 return SDValue();
520 }
521
522 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
523 switch (CCOpcode) {
524 case ISD::SETOEQ:
525 case ISD::SETONE:
526 case ISD::SETUNE:
527 case ISD::SETNE:
528 case ISD::SETUEQ:
529 case ISD::SETEQ:
530 case ISD::SETFALSE:
531 case ISD::SETFALSE2:
532 case ISD::SETTRUE:
533 case ISD::SETTRUE2:
534 case ISD::SETUO:
535 case ISD::SETO:
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000536 llvm_unreachable("Operation should already be optimised!");
Tom Stellard75aadc22012-12-11 21:25:42 +0000537 case ISD::SETULE:
538 case ISD::SETULT:
539 case ISD::SETOLE:
540 case ISD::SETOLT:
541 case ISD::SETLE:
542 case ISD::SETLT: {
543 if (LHS == True)
544 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
545 else
546 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
547 }
548 case ISD::SETGT:
549 case ISD::SETGE:
550 case ISD::SETUGE:
551 case ISD::SETOGE:
552 case ISD::SETUGT:
553 case ISD::SETOGT: {
554 if (LHS == True)
555 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
556 else
557 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
558 }
559 case ISD::SETCC_INVALID:
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000560 llvm_unreachable("Invalid setcc condcode!");
Tom Stellard75aadc22012-12-11 21:25:42 +0000561 }
562 return Op;
563}
564
Tom Stellard35bb18c2013-08-26 15:06:04 +0000565SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
566 SelectionDAG &DAG) const {
567 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
568 EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
569 EVT EltVT = Op.getValueType().getVectorElementType();
570 EVT PtrVT = Load->getBasePtr().getValueType();
571 unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
572 SmallVector<SDValue, 8> Loads;
573 SDLoc SL(Op);
574
575 for (unsigned i = 0, e = NumElts; i != e; ++i) {
576 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
577 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
578 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
579 Load->getChain(), Ptr,
580 MachinePointerInfo(Load->getMemOperand()->getValue()),
581 MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
582 Load->getAlignment()));
583 }
584 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
585 Loads.size());
586}
587
Tom Stellard2ffc3302013-08-26 15:05:44 +0000588SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
589 SelectionDAG &DAG) const {
590 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
591 EVT MemVT = Store->getMemoryVT();
592 unsigned MemBits = MemVT.getSizeInBits();
Tom Stellard75aadc22012-12-11 21:25:42 +0000593
Tom Stellard2ffc3302013-08-26 15:05:44 +0000594 // Byte stores are really expensive, so if possible, try to pack
595 // 32-bit vector truncatating store into an i32 store.
596 // XXX: We could also handle optimize other vector bitwidths
597 if (!MemVT.isVector() || MemBits > 32) {
598 return SDValue();
599 }
600
601 SDLoc DL(Op);
602 const SDValue &Value = Store->getValue();
603 EVT VT = Value.getValueType();
604 const SDValue &Ptr = Store->getBasePtr();
605 EVT MemEltVT = MemVT.getVectorElementType();
606 unsigned MemEltBits = MemEltVT.getSizeInBits();
607 unsigned MemNumElements = MemVT.getVectorNumElements();
608 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
609 SDValue Mask;
610 switch(MemEltBits) {
611 case 8:
612 Mask = DAG.getConstant(0xFF, PackedVT);
613 break;
614 case 16:
615 Mask = DAG.getConstant(0xFFFF, PackedVT);
616 break;
617 default:
618 llvm_unreachable("Cannot lower this vector store");
619 }
620 SDValue PackedValue;
621 for (unsigned i = 0; i < MemNumElements; ++i) {
622 EVT ElemVT = VT.getVectorElementType();
623 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
624 DAG.getConstant(i, MVT::i32));
625 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
626 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
627 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
628 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
629 if (i == 0) {
630 PackedValue = Elt;
631 } else {
632 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
633 }
634 }
635 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
636 MachinePointerInfo(Store->getMemOperand()->getValue()),
637 Store->isVolatile(), Store->isNonTemporal(),
638 Store->getAlignment());
639}
640
641SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
642 SelectionDAG &DAG) const {
643 StoreSDNode *Store = cast<StoreSDNode>(Op);
644 EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
645 EVT EltVT = Store->getValue().getValueType().getVectorElementType();
646 EVT PtrVT = Store->getBasePtr().getValueType();
647 unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
648 SDLoc SL(Op);
649
650 SmallVector<SDValue, 8> Chains;
651
652 for (unsigned i = 0, e = NumElts; i != e; ++i) {
653 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
654 Store->getValue(), DAG.getConstant(i, MVT::i32));
655 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
656 Store->getBasePtr(),
657 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
658 PtrVT));
Tom Stellardf3d166a2013-08-26 15:05:49 +0000659 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
Tom Stellard2ffc3302013-08-26 15:05:44 +0000660 MachinePointerInfo(Store->getMemOperand()->getValue()),
Tom Stellardf3d166a2013-08-26 15:05:49 +0000661 MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
Tom Stellard2ffc3302013-08-26 15:05:44 +0000662 Store->getAlignment()));
663 }
664 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
665}
666
Tom Stellarde9373602014-01-22 19:24:14 +0000667SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
668 SDLoc DL(Op);
669 LoadSDNode *Load = cast<LoadSDNode>(Op);
670 ISD::LoadExtType ExtType = Load->getExtensionType();
671
Tom Stellard04c0e982014-01-22 19:24:21 +0000672 // Lower loads constant address space global variable loads
673 if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
674 isa<GlobalVariable>(GetUnderlyingObject(Load->getPointerInfo().V))) {
675
676 SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
677 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
678 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
679 DAG.getConstant(2, MVT::i32));
680 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
681 Load->getChain(), Ptr,
682 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
683 }
684
Tom Stellarde9373602014-01-22 19:24:14 +0000685 if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
686 ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
687 return SDValue();
688
689
690 EVT VT = Op.getValueType();
691 EVT MemVT = Load->getMemoryVT();
692 unsigned Mask = 0;
693 if (Load->getMemoryVT() == MVT::i8) {
694 Mask = 0xff;
695 } else if (Load->getMemoryVT() == MVT::i16) {
696 Mask = 0xffff;
697 }
698 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
699 DAG.getConstant(2, MVT::i32));
700 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
701 Load->getChain(), Ptr,
702 DAG.getTargetConstant(0, MVT::i32),
703 Op.getOperand(2));
704 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
705 Load->getBasePtr(),
706 DAG.getConstant(0x3, MVT::i32));
707 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
708 DAG.getConstant(3, MVT::i32));
709 Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
710 Ret = DAG.getNode(ISD::AND, DL, MVT::i32, Ret,
711 DAG.getConstant(Mask, MVT::i32));
712 if (ExtType == ISD::SEXTLOAD) {
713 SDValue SExtShift = DAG.getConstant(
714 VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
715 Ret = DAG.getNode(ISD::SHL, DL, MVT::i32, Ret, SExtShift);
716 Ret = DAG.getNode(ISD::SRA, DL, MVT::i32, Ret, SExtShift);
717 }
718
719 return Ret;
720}
721
Tom Stellard2ffc3302013-08-26 15:05:44 +0000722SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Tom Stellarde9373602014-01-22 19:24:14 +0000723 SDLoc DL(Op);
Tom Stellard2ffc3302013-08-26 15:05:44 +0000724 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
725 if (Result.getNode()) {
726 return Result;
727 }
728
729 StoreSDNode *Store = cast<StoreSDNode>(Op);
Tom Stellarde9373602014-01-22 19:24:14 +0000730 SDValue Chain = Store->getChain();
Tom Stellard81d871d2013-11-13 23:36:50 +0000731 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
732 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Tom Stellard2ffc3302013-08-26 15:05:44 +0000733 Store->getValue().getValueType().isVector()) {
734 return SplitVectorStore(Op, DAG);
735 }
Tom Stellarde9373602014-01-22 19:24:14 +0000736
737 if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
738 Store->getMemoryVT().bitsLT(MVT::i32)) {
739 unsigned Mask = 0;
740 if (Store->getMemoryVT() == MVT::i8) {
741 Mask = 0xff;
742 } else if (Store->getMemoryVT() == MVT::i16) {
743 Mask = 0xffff;
744 }
745 SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
746 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
747 DAG.getConstant(2, MVT::i32));
748 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
749 Chain, Ptr, DAG.getTargetConstant(0, MVT::i32));
750 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, TruncPtr,
751 DAG.getConstant(0x3, MVT::i32));
752 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
753 DAG.getConstant(3, MVT::i32));
754 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
755 Store->getValue());
756 SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, SExtValue,
757 DAG.getConstant(Mask, MVT::i32));
758 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
759 MaskedValue, ShiftAmt);
760 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32),
761 ShiftAmt);
762 DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
763 DAG.getConstant(0xffffffff, MVT::i32));
764 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
765
766 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
767 return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
768 Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32));
769 }
Tom Stellard2ffc3302013-08-26 15:05:44 +0000770 return SDValue();
771}
Tom Stellard75aadc22012-12-11 21:25:42 +0000772
773SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
774 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000775 SDLoc DL(Op);
Tom Stellard75aadc22012-12-11 21:25:42 +0000776 EVT VT = Op.getValueType();
777
778 SDValue Num = Op.getOperand(0);
779 SDValue Den = Op.getOperand(1);
780
781 SmallVector<SDValue, 8> Results;
782
783 // RCP = URECIP(Den) = 2^32 / Den + e
784 // e is rounding error.
785 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
786
787 // RCP_LO = umulo(RCP, Den) */
788 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
789
790 // RCP_HI = mulhu (RCP, Den) */
791 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
792
793 // NEG_RCP_LO = -RCP_LO
794 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
795 RCP_LO);
796
797 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
798 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
799 NEG_RCP_LO, RCP_LO,
800 ISD::SETEQ);
801 // Calculate the rounding error from the URECIP instruction
802 // E = mulhu(ABS_RCP_LO, RCP)
803 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
804
805 // RCP_A_E = RCP + E
806 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
807
808 // RCP_S_E = RCP - E
809 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
810
811 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
812 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
813 RCP_A_E, RCP_S_E,
814 ISD::SETEQ);
815 // Quotient = mulhu(Tmp0, Num)
816 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
817
818 // Num_S_Remainder = Quotient * Den
819 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
820
821 // Remainder = Num - Num_S_Remainder
822 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
823
824 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
825 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
826 DAG.getConstant(-1, VT),
827 DAG.getConstant(0, VT),
Vincent Lejeune4f3751f2013-11-06 17:36:04 +0000828 ISD::SETUGE);
829 // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
830 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
831 Num_S_Remainder,
Tom Stellard75aadc22012-12-11 21:25:42 +0000832 DAG.getConstant(-1, VT),
833 DAG.getConstant(0, VT),
Vincent Lejeune4f3751f2013-11-06 17:36:04 +0000834 ISD::SETUGE);
Tom Stellard75aadc22012-12-11 21:25:42 +0000835 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
836 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
837 Remainder_GE_Zero);
838
839 // Calculate Division result:
840
841 // Quotient_A_One = Quotient + 1
842 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
843 DAG.getConstant(1, VT));
844
845 // Quotient_S_One = Quotient - 1
846 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
847 DAG.getConstant(1, VT));
848
849 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
850 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
851 Quotient, Quotient_A_One, ISD::SETEQ);
852
853 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
854 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
855 Quotient_S_One, Div, ISD::SETEQ);
856
857 // Calculate Rem result:
858
859 // Remainder_S_Den = Remainder - Den
860 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
861
862 // Remainder_A_Den = Remainder + Den
863 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
864
865 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
866 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
867 Remainder, Remainder_S_Den, ISD::SETEQ);
868
869 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
870 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
871 Remainder_A_Den, Rem, ISD::SETEQ);
872 SDValue Ops[2];
873 Ops[0] = Div;
874 Ops[1] = Rem;
875 return DAG.getMergeValues(Ops, 2, DL);
876}
877
Tom Stellardc947d8c2013-10-30 17:22:05 +0000878SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
879 SelectionDAG &DAG) const {
880 SDValue S0 = Op.getOperand(0);
881 SDLoc DL(Op);
882 if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
883 return SDValue();
884
885 // f32 uint_to_fp i64
886 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
887 DAG.getConstant(0, MVT::i32));
888 SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
889 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
890 DAG.getConstant(1, MVT::i32));
891 SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
892 FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
893 DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
894 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
895
896}
Tom Stellardfbab8272013-08-16 01:12:11 +0000897
Tom Stellard75aadc22012-12-11 21:25:42 +0000898//===----------------------------------------------------------------------===//
899// Helper functions
900//===----------------------------------------------------------------------===//
901
Tom Stellardaf775432013-10-23 00:44:32 +0000902void AMDGPUTargetLowering::getOriginalFunctionArgs(
903 SelectionDAG &DAG,
904 const Function *F,
905 const SmallVectorImpl<ISD::InputArg> &Ins,
906 SmallVectorImpl<ISD::InputArg> &OrigIns) const {
907
908 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
909 if (Ins[i].ArgVT == Ins[i].VT) {
910 OrigIns.push_back(Ins[i]);
911 continue;
912 }
913
914 EVT VT;
915 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
916 // Vector has been split into scalars.
917 VT = Ins[i].ArgVT.getVectorElementType();
918 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
919 Ins[i].ArgVT.getVectorElementType() !=
920 Ins[i].VT.getVectorElementType()) {
921 // Vector elements have been promoted
922 VT = Ins[i].ArgVT;
923 } else {
924 // Vector has been spilt into smaller vectors.
925 VT = Ins[i].VT;
926 }
927
928 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
929 Ins[i].OrigArgIndex, Ins[i].PartOffset);
930 OrigIns.push_back(Arg);
931 }
932}
933
Tom Stellard75aadc22012-12-11 21:25:42 +0000934bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
935 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
936 return CFP->isExactlyValue(1.0);
937 }
938 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
939 return C->isAllOnesValue();
940 }
941 return false;
942}
943
944bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
945 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
946 return CFP->getValueAPF().isZero();
947 }
948 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
949 return C->isNullValue();
950 }
951 return false;
952}
953
954SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
955 const TargetRegisterClass *RC,
956 unsigned Reg, EVT VT) const {
957 MachineFunction &MF = DAG.getMachineFunction();
958 MachineRegisterInfo &MRI = MF.getRegInfo();
959 unsigned VirtualRegister;
960 if (!MRI.isLiveIn(Reg)) {
961 VirtualRegister = MRI.createVirtualRegister(RC);
962 MRI.addLiveIn(Reg, VirtualRegister);
963 } else {
964 VirtualRegister = MRI.getLiveInVirtReg(Reg);
965 }
966 return DAG.getRegister(VirtualRegister, VT);
967}
968
969#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
970
971const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
972 switch (Opcode) {
973 default: return 0;
974 // AMDIL DAG nodes
Tom Stellard75aadc22012-12-11 21:25:42 +0000975 NODE_NAME_CASE(CALL);
976 NODE_NAME_CASE(UMUL);
977 NODE_NAME_CASE(DIV_INF);
978 NODE_NAME_CASE(RET_FLAG);
979 NODE_NAME_CASE(BRANCH_COND);
980
981 // AMDGPU DAG nodes
982 NODE_NAME_CASE(DWORDADDR)
983 NODE_NAME_CASE(FRACT)
984 NODE_NAME_CASE(FMAX)
985 NODE_NAME_CASE(SMAX)
986 NODE_NAME_CASE(UMAX)
987 NODE_NAME_CASE(FMIN)
988 NODE_NAME_CASE(SMIN)
989 NODE_NAME_CASE(UMIN)
990 NODE_NAME_CASE(URECIP)
Tom Stellard75aadc22012-12-11 21:25:42 +0000991 NODE_NAME_CASE(EXPORT)
Tom Stellardff62c352013-01-23 02:09:03 +0000992 NODE_NAME_CASE(CONST_ADDRESS)
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000993 NODE_NAME_CASE(REGISTER_LOAD)
994 NODE_NAME_CASE(REGISTER_STORE)
Tom Stellard9fa17912013-08-14 23:24:45 +0000995 NODE_NAME_CASE(LOAD_CONSTANT)
996 NODE_NAME_CASE(LOAD_INPUT)
997 NODE_NAME_CASE(SAMPLE)
998 NODE_NAME_CASE(SAMPLEB)
999 NODE_NAME_CASE(SAMPLED)
1000 NODE_NAME_CASE(SAMPLEL)
Tom Stellardd3ee8c12013-08-16 01:12:06 +00001001 NODE_NAME_CASE(STORE_MSKOR)
Tom Stellardafcf12f2013-09-12 02:55:14 +00001002 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
Tom Stellard75aadc22012-12-11 21:25:42 +00001003 }
1004}