blob: 3c4b5e72761dec457140a8b8fd69b6cd381a2d49 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUSubtarget.h"
Eric Christopherac4b69e2014-07-25 22:22:39 +000016#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000017#include "R600InstrInfo.h"
Matt Arsenaultf59e5382015-11-06 18:23:00 +000018#include "SIFrameLowering.h"
Eric Christopherac4b69e2014-07-25 22:22:39 +000019#include "SIISelLowering.h"
Chandler Carruthd9903882015-01-14 11:23:27 +000020#include "SIInstrInfo.h"
Tom Stellarde99fb652015-01-20 19:33:04 +000021#include "SIMachineFunctionInfo.h"
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000022#include "llvm/ADT/SmallString.h"
Tom Stellard83f0bce2015-01-29 16:55:25 +000023#include "llvm/CodeGen/MachineScheduler.h"
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000024
Tom Stellard75aadc22012-12-11 21:25:42 +000025using namespace llvm;
26
Chandler Carruthe96dd892014-04-21 22:55:11 +000027#define DEBUG_TYPE "amdgpu-subtarget"
28
Tom Stellard75aadc22012-12-11 21:25:42 +000029#define GET_SUBTARGETINFO_ENUM
30#define GET_SUBTARGETINFO_TARGET_DESC
31#define GET_SUBTARGETINFO_CTOR
32#include "AMDGPUGenSubtargetInfo.inc"
33
Matt Arsenault43e92fe2016-06-24 06:30:11 +000034AMDGPUSubtarget::~AMDGPUSubtarget() {}
35
Eric Christopherac4b69e2014-07-25 22:22:39 +000036AMDGPUSubtarget &
Daniel Sandersa73f1fd2015-06-10 12:11:26 +000037AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38 StringRef GPU, StringRef FS) {
Eric Christopherac4b69e2014-07-25 22:22:39 +000039 // Determine default and user-specified characteristics
Matt Arsenaultf171cf22014-07-14 23:40:49 +000040 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41 // enabled, but some instructions do not respect them and they run at the
42 // double precision rate, so don't enable by default.
43 //
44 // We want to be able to turn these off, but making this a subtarget feature
45 // for SI has the unhelpful behavior that it unsets everything else if you
46 // disable it.
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000047
Changpeng Fang71369b32016-05-26 19:35:29 +000048 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
Changpeng Fangb41574a2015-12-22 20:55:23 +000049 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
Matt Arsenault7f681ac2016-07-01 23:03:44 +000050 FullFS += "+flat-for-global,+unaligned-buffer-access,";
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000051 FullFS += FS;
52
53 ParseSubtargetFeatures(GPU, FullFS);
Tom Stellard2e59a452014-06-13 01:32:00 +000054
Eric Christopherac4b69e2014-07-25 22:22:39 +000055 // FIXME: I don't think think Evergreen has any useful support for
56 // denormals, but should be checked. Should we issue a warning somewhere
57 // if someone tries to enable these?
Tom Stellard2e59a452014-06-13 01:32:00 +000058 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
Matt Arsenaultf171cf22014-07-14 23:40:49 +000059 FP32Denormals = false;
60 FP64Denormals = false;
Eric Christopherac4b69e2014-07-25 22:22:39 +000061 }
Matt Arsenault24ee0782016-02-12 02:40:47 +000062
63 // Set defaults if needed.
64 if (MaxPrivateElementSize == 0)
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000065 MaxPrivateElementSize = 4;
Matt Arsenault24ee0782016-02-12 02:40:47 +000066
Eric Christopherac4b69e2014-07-25 22:22:39 +000067 return *this;
68}
69
Daniel Sandersa73f1fd2015-06-10 12:11:26 +000070AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
Matt Arsenault43e92fe2016-06-24 06:30:11 +000071 const TargetMachine &TM)
72 : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73 TargetTriple(TT),
74 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75 IsaVersion(ISAVersion0_0_0),
76 WavefrontSize(64),
77 LocalMemorySize(0),
78 LDSBankCount(0),
79 MaxPrivateElementSize(0),
Tom Stellard40ce8af2015-01-28 16:04:26 +000080
Matt Arsenault43e92fe2016-06-24 06:30:11 +000081 FastFMAF32(false),
82 HalfRate64Ops(false),
83
84 FP32Denormals(false),
85 FP64Denormals(false),
86 FPExceptions(false),
87 FlatForGlobal(false),
Matt Arsenault7f681ac2016-07-01 23:03:44 +000088 UnalignedBufferAccess(false),
89
Matt Arsenault43e92fe2016-06-24 06:30:11 +000090 EnableXNACK(false),
91 DebuggerInsertNops(false),
92 DebuggerReserveRegs(false),
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +000093 DebuggerEmitPrologue(false),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000094
95 EnableVGPRSpilling(false),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000096 EnablePromoteAlloca(false),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000097 EnableLoadStoreOpt(false),
98 EnableUnsafeDSOffsetFolding(false),
99 EnableSIScheduler(false),
100 DumpCode(false),
101
102 FP64(false),
103 IsGCN(false),
104 GCN1Encoding(false),
105 GCN3Encoding(false),
106 CIInsts(false),
107 SGPRInitBug(false),
108 HasSMemRealTime(false),
109 Has16BitInsts(false),
110 FlatAddressSpace(false),
111
112 R600ALUInst(false),
113 CaymanISA(false),
114 CFALUBug(false),
115 HasVertexCache(false),
116 TexVTXClauseSize(0),
117
118 FeatureDisable(false),
Matt Arsenault56684d42016-08-11 17:31:42 +0000119 InstrItins(getInstrItineraryForCPU(GPU)),
120 TSInfo() {
Tom Stellard40ce8af2015-01-28 16:04:26 +0000121 initializeSubtargetDependencies(TT, GPU, FS);
Tom Stellarda40f9712014-01-22 21:55:43 +0000122}
Tom Stellardb8fd6ef2014-12-02 22:00:07 +0000123
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000124// FIXME: These limits are for SI. Did they change with the larger maximum LDS
125// size?
126unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
127 switch (NWaves) {
128 case 10:
129 return 1638;
130 case 9:
131 return 1820;
132 case 8:
133 return 2048;
134 case 7:
135 return 2340;
136 case 6:
137 return 2730;
138 case 5:
139 return 3276;
140 case 4:
141 return 4096;
142 case 3:
143 return 5461;
144 case 2:
145 return 8192;
146 default:
147 return getLocalMemorySize();
148 }
149}
150
151unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
152 if (Bytes <= 1638)
153 return 10;
154
155 if (Bytes <= 1820)
156 return 9;
157
158 if (Bytes <= 2048)
159 return 8;
160
161 if (Bytes <= 2340)
162 return 7;
163
164 if (Bytes <= 2730)
165 return 6;
166
167 if (Bytes <= 3276)
168 return 5;
169
170 if (Bytes <= 4096)
171 return 4;
172
173 if (Bytes <= 5461)
174 return 3;
175
176 if (Bytes <= 8192)
177 return 2;
178
179 return 1;
180}
181
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000182std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
183 const Function &F) const {
184
185 // Default minimum/maximum flat work group sizes.
186 std::pair<unsigned, unsigned> Default =
187 AMDGPU::isCompute(F.getCallingConv()) ?
188 std::pair<unsigned, unsigned>(getWavefrontSize() * 2,
189 getWavefrontSize() * 4) :
190 std::pair<unsigned, unsigned>(1, getWavefrontSize());
191
192 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
193 // starts using "amdgpu-flat-work-group-size" attribute.
194 Default.second = AMDGPU::getIntegerAttribute(
195 F, "amdgpu-max-work-group-size", Default.second);
196 Default.first = std::min(Default.first, Default.second);
197
198 // Requested minimum/maximum flat work group sizes.
199 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
200 F, "amdgpu-flat-work-group-size", Default);
201
202 // Make sure requested minimum is less than requested maximum.
203 if (Requested.first > Requested.second)
204 return Default;
205
206 // Make sure requested values do not violate subtarget's specifications.
207 if (Requested.first < getMinFlatWorkGroupSize())
208 return Default;
209 if (Requested.second > getMaxFlatWorkGroupSize())
210 return Default;
211
212 return Requested;
213}
214
215std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
216 const Function &F) const {
217
218 // Default minimum/maximum number of waves per execution unit.
219 std::pair<unsigned, unsigned> Default(1, 0);
220
221 // Default/requested minimum/maximum flat work group sizes.
222 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
223
224 // If minimum/maximum flat work group sizes were explicitly requested using
225 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
226 // number of waves per execution unit to values implied by requested
227 // minimum/maximum flat work group sizes.
228 unsigned MinImpliedByFlatWorkGroupSize =
229 getMaxWavesPerEU(FlatWorkGroupSizes.second);
230 bool RequestedFlatWorkGroupSize = false;
231
232 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
233 // starts using "amdgpu-flat-work-group-size" attribute.
234 if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
235 F.hasFnAttribute("amdgpu-flat-work-group-size")) {
236 Default.first = MinImpliedByFlatWorkGroupSize;
237 RequestedFlatWorkGroupSize = true;
238 }
239
240 // Requested minimum/maximum number of waves per execution unit.
241 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
242 F, "amdgpu-waves-per-eu", Default, true);
243
244 // Make sure requested minimum is less than requested maximum.
245 if (Requested.second && Requested.first > Requested.second)
246 return Default;
247
248 // Make sure requested values do not violate subtarget's specifications.
249 if (Requested.first < getMinWavesPerEU() ||
250 Requested.first > getMaxWavesPerEU())
251 return Default;
252 if (Requested.second > getMaxWavesPerEU())
253 return Default;
254
255 // Make sure requested values are compatible with values implied by requested
256 // minimum/maximum flat work group sizes.
257 if (RequestedFlatWorkGroupSize &&
258 Requested.first > MinImpliedByFlatWorkGroupSize)
259 return Default;
260
261 return Requested;
262}
263
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000264R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
265 const TargetMachine &TM) :
266 AMDGPUSubtarget(TT, GPU, FS, TM),
267 InstrInfo(*this),
268 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
269 TLInfo(TM, *this) {}
270
271SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
272 const TargetMachine &TM) :
273 AMDGPUSubtarget(TT, GPU, FS, TM),
274 InstrInfo(*this),
275 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
Matt Arsenaulteb9025d2016-06-28 17:42:09 +0000276 TLInfo(TM, *this),
277 GISel() {}
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000278
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000279void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Matt Arsenault55dff272016-06-28 00:11:26 +0000280 unsigned NumRegionInstrs) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000281 // Track register pressure so the scheduler can try to decrease
282 // pressure once register usage is above the threshold defined by
283 // SIRegisterInfo::getRegPressureSetLimit()
284 Policy.ShouldTrackPressure = true;
Tom Stellard83f0bce2015-01-29 16:55:25 +0000285
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000286 // Enabling both top down and bottom up scheduling seems to give us less
287 // register spills than just using one of these approaches on its own.
288 Policy.OnlyTopDown = false;
289 Policy.OnlyBottomUp = false;
Tom Stellard83f0bce2015-01-29 16:55:25 +0000290
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000291 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
292 if (!enableSIScheduler())
293 Policy.ShouldTrackLaneMasks = true;
294}
Tom Stellard0bc954e2016-03-30 16:35:09 +0000295
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000296bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
297 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
298}
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000299
300unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
301 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
302 if (SGPRs <= 80)
303 return 10;
304 if (SGPRs <= 88)
305 return 9;
306 if (SGPRs <= 100)
307 return 8;
308 return 7;
309 }
310 if (SGPRs <= 48)
311 return 10;
312 if (SGPRs <= 56)
313 return 9;
314 if (SGPRs <= 64)
315 return 8;
316 if (SGPRs <= 72)
317 return 7;
318 if (SGPRs <= 80)
319 return 6;
320 return 5;
321}
322
323unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
324 if (VGPRs <= 24)
325 return 10;
326 if (VGPRs <= 28)
327 return 9;
328 if (VGPRs <= 32)
329 return 8;
330 if (VGPRs <= 36)
331 return 7;
332 if (VGPRs <= 40)
333 return 6;
334 if (VGPRs <= 48)
335 return 5;
336 if (VGPRs <= 64)
337 return 4;
338 if (VGPRs <= 84)
339 return 3;
340 if (VGPRs <= 128)
341 return 2;
342 return 1;
343}