blob: 44f59c33125a24f0373d358c6fc45fd161eb0be2 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUSubtarget.h"
Eric Christopherac4b69e2014-07-25 22:22:39 +000016#include "R600ISelLowering.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000017#include "R600InstrInfo.h"
Matt Arsenaultf59e5382015-11-06 18:23:00 +000018#include "SIFrameLowering.h"
Eric Christopherac4b69e2014-07-25 22:22:39 +000019#include "SIISelLowering.h"
Chandler Carruthd9903882015-01-14 11:23:27 +000020#include "SIInstrInfo.h"
Tom Stellarde99fb652015-01-20 19:33:04 +000021#include "SIMachineFunctionInfo.h"
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000022#include "llvm/ADT/SmallString.h"
Tom Stellard83f0bce2015-01-29 16:55:25 +000023#include "llvm/CodeGen/MachineScheduler.h"
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000024
Tom Stellard75aadc22012-12-11 21:25:42 +000025using namespace llvm;
26
Chandler Carruthe96dd892014-04-21 22:55:11 +000027#define DEBUG_TYPE "amdgpu-subtarget"
28
Tom Stellard75aadc22012-12-11 21:25:42 +000029#define GET_SUBTARGETINFO_ENUM
30#define GET_SUBTARGETINFO_TARGET_DESC
31#define GET_SUBTARGETINFO_CTOR
32#include "AMDGPUGenSubtargetInfo.inc"
33
Matt Arsenault43e92fe2016-06-24 06:30:11 +000034AMDGPUSubtarget::~AMDGPUSubtarget() {}
35
Eric Christopherac4b69e2014-07-25 22:22:39 +000036AMDGPUSubtarget &
Daniel Sandersa73f1fd2015-06-10 12:11:26 +000037AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38 StringRef GPU, StringRef FS) {
Eric Christopherac4b69e2014-07-25 22:22:39 +000039 // Determine default and user-specified characteristics
Matt Arsenaultf171cf22014-07-14 23:40:49 +000040 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41 // enabled, but some instructions do not respect them and they run at the
42 // double precision rate, so don't enable by default.
43 //
44 // We want to be able to turn these off, but making this a subtarget feature
45 // for SI has the unhelpful behavior that it unsets everything else if you
46 // disable it.
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000047
Changpeng Fang71369b32016-05-26 19:35:29 +000048 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
Changpeng Fangb41574a2015-12-22 20:55:23 +000049 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
Matt Arsenault7f681ac2016-07-01 23:03:44 +000050 FullFS += "+flat-for-global,+unaligned-buffer-access,";
Matt Arsenaultd9a23ab2014-07-13 02:08:26 +000051 FullFS += FS;
52
53 ParseSubtargetFeatures(GPU, FullFS);
Tom Stellard2e59a452014-06-13 01:32:00 +000054
Eric Christopherac4b69e2014-07-25 22:22:39 +000055 // FIXME: I don't think think Evergreen has any useful support for
56 // denormals, but should be checked. Should we issue a warning somewhere
57 // if someone tries to enable these?
Tom Stellard2e59a452014-06-13 01:32:00 +000058 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
Matt Arsenaultf171cf22014-07-14 23:40:49 +000059 FP32Denormals = false;
60 FP64Denormals = false;
Eric Christopherac4b69e2014-07-25 22:22:39 +000061 }
Matt Arsenault24ee0782016-02-12 02:40:47 +000062
63 // Set defaults if needed.
64 if (MaxPrivateElementSize == 0)
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000065 MaxPrivateElementSize = 4;
Matt Arsenault24ee0782016-02-12 02:40:47 +000066
Eric Christopherac4b69e2014-07-25 22:22:39 +000067 return *this;
68}
69
Daniel Sandersa73f1fd2015-06-10 12:11:26 +000070AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
Matt Arsenault43e92fe2016-06-24 06:30:11 +000071 const TargetMachine &TM)
72 : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73 TargetTriple(TT),
74 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75 IsaVersion(ISAVersion0_0_0),
76 WavefrontSize(64),
77 LocalMemorySize(0),
78 LDSBankCount(0),
79 MaxPrivateElementSize(0),
Tom Stellard40ce8af2015-01-28 16:04:26 +000080
Matt Arsenault43e92fe2016-06-24 06:30:11 +000081 FastFMAF32(false),
82 HalfRate64Ops(false),
83
84 FP32Denormals(false),
85 FP64Denormals(false),
86 FPExceptions(false),
87 FlatForGlobal(false),
Matt Arsenault7f681ac2016-07-01 23:03:44 +000088 UnalignedBufferAccess(false),
89
Matt Arsenault43e92fe2016-06-24 06:30:11 +000090 EnableXNACK(false),
91 DebuggerInsertNops(false),
92 DebuggerReserveRegs(false),
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +000093 DebuggerEmitPrologue(false),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000094
95 EnableVGPRSpilling(false),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000096 EnablePromoteAlloca(false),
Matt Arsenault43e92fe2016-06-24 06:30:11 +000097 EnableLoadStoreOpt(false),
98 EnableUnsafeDSOffsetFolding(false),
99 EnableSIScheduler(false),
100 DumpCode(false),
101
102 FP64(false),
103 IsGCN(false),
104 GCN1Encoding(false),
105 GCN3Encoding(false),
106 CIInsts(false),
107 SGPRInitBug(false),
108 HasSMemRealTime(false),
109 Has16BitInsts(false),
110 FlatAddressSpace(false),
111
112 R600ALUInst(false),
113 CaymanISA(false),
114 CFALUBug(false),
115 HasVertexCache(false),
116 TexVTXClauseSize(0),
117
118 FeatureDisable(false),
Matt Arsenault56684d42016-08-11 17:31:42 +0000119 InstrItins(getInstrItineraryForCPU(GPU)),
120 TSInfo() {
Tom Stellard40ce8af2015-01-28 16:04:26 +0000121 initializeSubtargetDependencies(TT, GPU, FS);
Tom Stellarda40f9712014-01-22 21:55:43 +0000122}
Tom Stellardb8fd6ef2014-12-02 22:00:07 +0000123
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000124// FIXME: These limits are for SI. Did they change with the larger maximum LDS
125// size?
126unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
127 switch (NWaves) {
128 case 10:
129 return 1638;
130 case 9:
131 return 1820;
132 case 8:
133 return 2048;
134 case 7:
135 return 2340;
136 case 6:
137 return 2730;
138 case 5:
139 return 3276;
140 case 4:
141 return 4096;
142 case 3:
143 return 5461;
144 case 2:
145 return 8192;
146 default:
147 return getLocalMemorySize();
148 }
149}
150
151unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
152 if (Bytes <= 1638)
153 return 10;
154
155 if (Bytes <= 1820)
156 return 9;
157
158 if (Bytes <= 2048)
159 return 8;
160
161 if (Bytes <= 2340)
162 return 7;
163
164 if (Bytes <= 2730)
165 return 6;
166
167 if (Bytes <= 3276)
168 return 5;
169
170 if (Bytes <= 4096)
171 return 4;
172
173 if (Bytes <= 5461)
174 return 3;
175
176 if (Bytes <= 8192)
177 return 2;
178
179 return 1;
180}
181
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000182R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
183 const TargetMachine &TM) :
184 AMDGPUSubtarget(TT, GPU, FS, TM),
185 InstrInfo(*this),
186 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
187 TLInfo(TM, *this) {}
188
189SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
190 const TargetMachine &TM) :
191 AMDGPUSubtarget(TT, GPU, FS, TM),
192 InstrInfo(*this),
193 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
Matt Arsenaulteb9025d2016-06-28 17:42:09 +0000194 TLInfo(TM, *this),
195 GISel() {}
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000196
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000197void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Matt Arsenault55dff272016-06-28 00:11:26 +0000198 unsigned NumRegionInstrs) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000199 // Track register pressure so the scheduler can try to decrease
200 // pressure once register usage is above the threshold defined by
201 // SIRegisterInfo::getRegPressureSetLimit()
202 Policy.ShouldTrackPressure = true;
Tom Stellard83f0bce2015-01-29 16:55:25 +0000203
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000204 // Enabling both top down and bottom up scheduling seems to give us less
205 // register spills than just using one of these approaches on its own.
206 Policy.OnlyTopDown = false;
207 Policy.OnlyBottomUp = false;
Tom Stellard83f0bce2015-01-29 16:55:25 +0000208
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000209 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
210 if (!enableSIScheduler())
211 Policy.ShouldTrackLaneMasks = true;
212}
Tom Stellard0bc954e2016-03-30 16:35:09 +0000213
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000214bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
215 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
216}