blob: aa3034509d192ae9ef7d7a69d5f83704d047074c [file] [log] [blame]
Tom Stellard45bb48e2015-06-13 03:28:10 +00001//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief The AMDGPU target machine contains all of the hardware specific
12/// information needed to emit code for R600 and SI GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUTargetMachine.h"
17#include "AMDGPU.h"
Matt Arsenaulteb9025d2016-06-28 17:42:09 +000018#include "AMDGPUCallLowering.h"
19#include "AMDGPUTargetObjectFile.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000020#include "AMDGPUTargetTransformInfo.h"
Tom Stellard0d23ebe2016-08-29 19:42:52 +000021#include "GCNSchedStrategy.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000022#include "R600MachineScheduler.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000023#include "SIMachineScheduler.h"
Eugene Zelenko6a9226d2016-12-12 22:23:53 +000024#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/ADT/Triple.h"
28#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
Tom Stellard000c5af2016-04-14 19:09:28 +000029#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
Eugene Zelenko6a9226d2016-12-12 22:23:53 +000030#include "llvm/CodeGen/MachineScheduler.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000031#include "llvm/CodeGen/Passes.h"
Matthias Braun31d19d42016-05-10 03:21:59 +000032#include "llvm/CodeGen/TargetPassConfig.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000033#include "llvm/Support/TargetRegistry.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000034#include "llvm/Transforms/IPO.h"
Chandler Carruth67fc52f2016-08-17 02:56:20 +000035#include "llvm/Transforms/IPO/AlwaysInliner.h"
Stanislav Mekhanoshin81598112017-01-26 16:49:08 +000036#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000037#include "llvm/Transforms/Scalar.h"
Matt Arsenaultf42c6922016-06-15 00:11:01 +000038#include "llvm/Transforms/Scalar/GVN.h"
Matt Arsenault908b9e22016-07-01 03:33:52 +000039#include "llvm/Transforms/Vectorize.h"
Eugene Zelenko6a9226d2016-12-12 22:23:53 +000040#include "llvm/IR/Attributes.h"
41#include "llvm/IR/Function.h"
Stanislav Mekhanoshin50ea93a2016-12-08 19:46:04 +000042#include "llvm/IR/LegacyPassManager.h"
Eugene Zelenko6a9226d2016-12-12 22:23:53 +000043#include "llvm/Pass.h"
44#include "llvm/Support/CommandLine.h"
45#include "llvm/Support/Compiler.h"
46#include "llvm/Target/TargetLoweringObjectFile.h"
47#include <memory>
Tom Stellard45bb48e2015-06-13 03:28:10 +000048
49using namespace llvm;
50
Matt Arsenaultc5816112016-06-24 06:30:22 +000051static cl::opt<bool> EnableR600StructurizeCFG(
52 "r600-ir-structurize",
53 cl::desc("Use StructurizeCFG IR pass"),
54 cl::init(true));
55
Matt Arsenault03d85842016-06-27 20:32:13 +000056static cl::opt<bool> EnableSROA(
57 "amdgpu-sroa",
58 cl::desc("Run SROA after promote alloca pass"),
59 cl::ReallyHidden,
60 cl::init(true));
61
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +000062static cl::opt<bool>
63EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
64 cl::desc("Run early if-conversion"),
65 cl::init(false));
66
Matt Arsenault03d85842016-06-27 20:32:13 +000067static cl::opt<bool> EnableR600IfConvert(
68 "r600-if-convert",
69 cl::desc("Use if conversion pass"),
70 cl::ReallyHidden,
71 cl::init(true));
72
Matt Arsenault908b9e22016-07-01 03:33:52 +000073// Option to disable vectorizer for tests.
74static cl::opt<bool> EnableLoadStoreVectorizer(
75 "amdgpu-load-store-vectorizer",
76 cl::desc("Enable load store vectorizer"),
Matt Arsenault0efdd062016-09-09 22:29:28 +000077 cl::init(true),
Matt Arsenault908b9e22016-07-01 03:33:52 +000078 cl::Hidden);
79
Alexander Timofeev18009562016-12-08 17:28:47 +000080// Option to to control global loads scalarization
81static cl::opt<bool> ScalarizeGlobal(
82 "amdgpu-scalarize-global-loads",
83 cl::desc("Enable global load scalarization"),
84 cl::init(false),
85 cl::Hidden);
86
Tom Stellard45bb48e2015-06-13 03:28:10 +000087extern "C" void LLVMInitializeAMDGPUTarget() {
88 // Register the target
Mehdi Aminif42454b2016-10-09 23:00:34 +000089 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
90 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
Matt Arsenaultb87fc222015-10-01 22:10:03 +000091
92 PassRegistry *PR = PassRegistry::getPassRegistry();
Matt Arsenault8c0ef8b2015-10-12 17:43:59 +000093 initializeSILowerI1CopiesPass(*PR);
Matt Arsenault782c03b2015-11-03 22:30:13 +000094 initializeSIFixSGPRCopiesPass(*PR);
Stanislav Mekhanoshin22a56f22017-01-24 17:46:17 +000095 initializeSIFixVGPRCopiesPass(*PR);
Matt Arsenault8c0ef8b2015-10-12 17:43:59 +000096 initializeSIFoldOperandsPass(*PR);
Matt Arsenaultc3a01ec2016-06-09 23:18:47 +000097 initializeSIShrinkInstructionsPass(*PR);
Matt Arsenault187276f2015-10-07 00:42:53 +000098 initializeSIFixControlFlowLiveIntervalsPass(*PR);
99 initializeSILoadStoreOptimizerPass(*PR);
Matt Arsenault39319482015-11-06 18:01:57 +0000100 initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
Tom Stellarda6f24c62015-12-15 20:55:55 +0000101 initializeAMDGPUAnnotateUniformValuesPass(*PR);
Matt Arsenaulte0132462016-01-30 05:19:45 +0000102 initializeAMDGPUPromoteAllocaPass(*PR);
Matt Arsenault86de4862016-06-24 07:07:55 +0000103 initializeAMDGPUCodeGenPreparePass(*PR);
Stanislav Mekhanoshin50ea93a2016-12-08 19:46:04 +0000104 initializeAMDGPUUnifyMetadataPass(*PR);
Tom Stellard77a17772016-01-20 15:48:27 +0000105 initializeSIAnnotateControlFlowPass(*PR);
Tom Stellard6e1967e2016-02-05 17:42:38 +0000106 initializeSIInsertWaitsPass(*PR);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000107 initializeSIWholeQuadModePass(*PR);
Matt Arsenault55d49cf2016-02-12 02:16:10 +0000108 initializeSILowerControlFlowPass(*PR);
Matt Arsenault78fc9da2016-08-22 19:33:16 +0000109 initializeSIInsertSkipsPass(*PR);
Matt Arsenaultd3e4c642016-06-02 00:04:22 +0000110 initializeSIDebuggerInsertNopsPass(*PR);
Matt Arsenaulte6740752016-09-29 01:44:16 +0000111 initializeSIOptimizeExecMaskingPass(*PR);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000112}
113
Tom Stellarde135ffd2015-09-25 21:41:28 +0000114static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000115 return llvm::make_unique<AMDGPUTargetObjectFile>();
Tom Stellarde135ffd2015-09-25 21:41:28 +0000116}
117
Tom Stellard45bb48e2015-06-13 03:28:10 +0000118static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000119 return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000120}
121
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +0000122static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
123 return new SIScheduleDAGMI(C);
124}
125
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000126static ScheduleDAGInstrs *
127createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
128 ScheduleDAGMILive *DAG =
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000129 new ScheduleDAGMILive(C,
130 llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
Matthias Braun115efcd2016-11-28 20:11:54 +0000131 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
132 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000133 return DAG;
134}
135
Tom Stellard45bb48e2015-06-13 03:28:10 +0000136static MachineSchedRegistry
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000137R600SchedRegistry("r600", "Run R600's custom scheduler",
138 createR600MachineScheduler);
139
140static MachineSchedRegistry
141SISchedRegistry("si", "Run SI's custom scheduler",
142 createSIMachineScheduler);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000143
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000144static MachineSchedRegistry
145GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
146 "Run GCN scheduler to maximize occupancy",
147 createGCNMaxOccupancyMachineScheduler);
148
Matt Arsenaultec30eb52016-05-31 16:57:45 +0000149static StringRef computeDataLayout(const Triple &TT) {
150 if (TT.getArch() == Triple::r600) {
151 // 32-bit pointers.
152 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
153 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
Tom Stellard45bb48e2015-06-13 03:28:10 +0000154 }
155
Matt Arsenaultec30eb52016-05-31 16:57:45 +0000156 // 32-bit private, local, and region pointers. 64-bit global, constant and
157 // flat.
158 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
159 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
160 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
Tom Stellard45bb48e2015-06-13 03:28:10 +0000161}
162
Matt Arsenaultb22828f2016-01-27 02:17:49 +0000163LLVM_READNONE
164static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
165 if (!GPU.empty())
166 return GPU;
167
168 // HSA only supports CI+, so change the default GPU to a CI for HSA.
169 if (TT.getArch() == Triple::amdgcn)
170 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
171
Matt Arsenault8e001942016-06-02 18:37:16 +0000172 return "r600";
Matt Arsenaultb22828f2016-01-27 02:17:49 +0000173}
174
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000175static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
Tom Stellard418beb72016-07-13 14:23:33 +0000176 // The AMDGPU toolchain only supports generating shared objects, so we
177 // must always use PIC.
178 return Reloc::PIC_;
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000179}
180
Tom Stellard45bb48e2015-06-13 03:28:10 +0000181AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
182 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000183 TargetOptions Options,
184 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000185 CodeModel::Model CM,
186 CodeGenOpt::Level OptLevel)
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000187 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
188 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000189 TLOF(createTLOF(getTargetTriple())) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000190 initAsmInfo();
191}
192
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000193AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000194
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000195StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
196 Attribute GPUAttr = F.getFnAttribute("target-cpu");
197 return GPUAttr.hasAttribute(Attribute::None) ?
198 getTargetCPU() : GPUAttr.getValueAsString();
199}
200
201StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
202 Attribute FSAttr = F.getFnAttribute("target-features");
203
204 return FSAttr.hasAttribute(Attribute::None) ?
205 getTargetFeatureString() :
206 FSAttr.getValueAsString();
207}
208
Stanislav Mekhanoshin81598112017-01-26 16:49:08 +0000209void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
210 Builder.addExtension(
211 PassManagerBuilder::EP_EarlyAsPossible,
212 [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
213 PM.add(createAMDGPUUnifyMetadataPass());
214 });
Stanislav Mekhanoshin50ea93a2016-12-08 19:46:04 +0000215}
216
Tom Stellard45bb48e2015-06-13 03:28:10 +0000217//===----------------------------------------------------------------------===//
218// R600 Target Machine (R600 -> Cayman)
219//===----------------------------------------------------------------------===//
220
221R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
Tom Stellard5dde1d22016-02-05 18:29:17 +0000222 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000223 TargetOptions Options,
224 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000225 CodeModel::Model CM, CodeGenOpt::Level OL)
Matt Arsenaultad55ee52016-12-06 01:02:51 +0000226 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
227 setRequiresStructuredCFG(true);
228}
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000229
230const R600Subtarget *R600TargetMachine::getSubtargetImpl(
231 const Function &F) const {
232 StringRef GPU = getGPUName(F);
233 StringRef FS = getFeatureString(F);
234
235 SmallString<128> SubtargetKey(GPU);
236 SubtargetKey.append(FS);
237
238 auto &I = SubtargetMap[SubtargetKey];
239 if (!I) {
240 // This needs to be done before we create a new subtarget since any
241 // creation will depend on the TM and the code generation flags on the
242 // function that reside in TargetOptions.
243 resetTargetOptions(F);
244 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
245 }
246
247 return I.get();
248}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000249
250//===----------------------------------------------------------------------===//
251// GCN Target Machine (SI+)
252//===----------------------------------------------------------------------===//
253
Matt Arsenault55dff272016-06-28 00:11:26 +0000254#ifdef LLVM_BUILD_GLOBAL_ISEL
255namespace {
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000256
Matt Arsenault55dff272016-06-28 00:11:26 +0000257struct SIGISelActualAccessor : public GISelAccessor {
Matt Arsenaulteb9025d2016-06-28 17:42:09 +0000258 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
259 const AMDGPUCallLowering *getCallLowering() const override {
Matt Arsenault55dff272016-06-28 00:11:26 +0000260 return CallLoweringInfo.get();
261 }
262};
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000263
264} // end anonymous namespace
Matt Arsenault55dff272016-06-28 00:11:26 +0000265#endif
266
Tom Stellard45bb48e2015-06-13 03:28:10 +0000267GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
Tom Stellard5dde1d22016-02-05 18:29:17 +0000268 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000269 TargetOptions Options,
270 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000271 CodeModel::Model CM, CodeGenOpt::Level OL)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000272 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
273
274const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
275 StringRef GPU = getGPUName(F);
276 StringRef FS = getFeatureString(F);
277
278 SmallString<128> SubtargetKey(GPU);
279 SubtargetKey.append(FS);
280
281 auto &I = SubtargetMap[SubtargetKey];
282 if (!I) {
283 // This needs to be done before we create a new subtarget since any
284 // creation will depend on the TM and the code generation flags on the
285 // function that reside in TargetOptions.
286 resetTargetOptions(F);
287 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
288
289#ifndef LLVM_BUILD_GLOBAL_ISEL
290 GISelAccessor *GISel = new GISelAccessor();
291#else
292 SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
Matt Arsenaulteb9025d2016-06-28 17:42:09 +0000293 GISel->CallLoweringInfo.reset(
294 new AMDGPUCallLowering(*I->getTargetLowering()));
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000295#endif
296
297 I->setGISelAccessor(*GISel);
298 }
299
Alexander Timofeev18009562016-12-08 17:28:47 +0000300 I->setScalarizeGlobalBehavior(ScalarizeGlobal);
301
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000302 return I.get();
303}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000304
305//===----------------------------------------------------------------------===//
306// AMDGPU Pass Setup
307//===----------------------------------------------------------------------===//
308
309namespace {
Tom Stellardcc7067a62016-03-03 03:53:29 +0000310
Tom Stellard45bb48e2015-06-13 03:28:10 +0000311class AMDGPUPassConfig : public TargetPassConfig {
312public:
313 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
Matt Arsenault0a109002015-09-25 17:41:20 +0000314 : TargetPassConfig(TM, PM) {
Matt Arsenault0a109002015-09-25 17:41:20 +0000315 // Exceptions and StackMaps are not supported, so these passes will never do
316 // anything.
317 disablePass(&StackMapLivenessID);
318 disablePass(&FuncletLayoutID);
319 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000320
321 AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
322 return getTM<AMDGPUTargetMachine>();
323 }
324
Matthias Braun115efcd2016-11-28 20:11:54 +0000325 ScheduleDAGInstrs *
326 createMachineScheduler(MachineSchedContext *C) const override {
327 ScheduleDAGMILive *DAG = createGenericSchedLive(C);
328 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
329 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
330 return DAG;
331 }
332
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000333 void addEarlyCSEOrGVNPass();
334 void addStraightLineScalarOptimizationPasses();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000335 void addIRPasses() override;
Matt Arsenault908b9e22016-07-01 03:33:52 +0000336 void addCodeGenPrepare() override;
Matt Arsenault0a109002015-09-25 17:41:20 +0000337 bool addPreISel() override;
338 bool addInstSelector() override;
339 bool addGCPasses() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000340};
341
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000342class R600PassConfig final : public AMDGPUPassConfig {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000343public:
344 R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000345 : AMDGPUPassConfig(TM, PM) {}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000346
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000347 ScheduleDAGInstrs *createMachineScheduler(
348 MachineSchedContext *C) const override {
349 return createR600MachineScheduler(C);
350 }
351
Tom Stellard45bb48e2015-06-13 03:28:10 +0000352 bool addPreISel() override;
353 void addPreRegAlloc() override;
354 void addPreSched2() override;
355 void addPreEmitPass() override;
356};
357
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000358class GCNPassConfig final : public AMDGPUPassConfig {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000359public:
360 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000361 : AMDGPUPassConfig(TM, PM) {}
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000362
363 GCNTargetMachine &getGCNTargetMachine() const {
364 return getTM<GCNTargetMachine>();
365 }
366
367 ScheduleDAGInstrs *
Matt Arsenault03d85842016-06-27 20:32:13 +0000368 createMachineScheduler(MachineSchedContext *C) const override;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000369
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000370 void addIRPasses() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000371 bool addPreISel() override;
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000372 void addMachineSSAOptimization() override;
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000373 bool addILPOpts() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000374 bool addInstSelector() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000375#ifdef LLVM_BUILD_GLOBAL_ISEL
376 bool addIRTranslator() override;
Tim Northover33b07d62016-07-22 20:03:43 +0000377 bool addLegalizeMachineIR() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000378 bool addRegBankSelect() override;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000379 bool addGlobalInstructionSelect() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000380#endif
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000381 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
382 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000383 void addPreRegAlloc() override;
Matt Arsenaulte6740752016-09-29 01:44:16 +0000384 void addPostRegAlloc() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000385 void addPreSched2() override;
386 void addPreEmitPass() override;
387};
388
Eugene Zelenko6a9226d2016-12-12 22:23:53 +0000389} // end anonymous namespace
Tom Stellard45bb48e2015-06-13 03:28:10 +0000390
391TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
Eric Christophera4e5d3c2015-09-16 23:38:13 +0000392 return TargetIRAnalysis([this](const Function &F) {
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000393 return TargetTransformInfo(AMDGPUTTIImpl(this, F));
Mehdi Amini5010ebf2015-07-09 02:08:42 +0000394 });
Tom Stellard45bb48e2015-06-13 03:28:10 +0000395}
396
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000397void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
398 if (getOptLevel() == CodeGenOpt::Aggressive)
399 addPass(createGVNPass());
400 else
401 addPass(createEarlyCSEPass());
402}
403
404void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
405 addPass(createSeparateConstOffsetFromGEPPass());
406 addPass(createSpeculativeExecutionPass());
407 // ReassociateGEPs exposes more opportunites for SLSR. See
408 // the example in reassociate-geps-and-slsr.ll.
409 addPass(createStraightLineStrengthReducePass());
410 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
411 // EarlyCSE can reuse.
412 addEarlyCSEOrGVNPass();
413 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
414 addPass(createNaryReassociatePass());
415 // NaryReassociate on GEPs creates redundant common expressions, so run
416 // EarlyCSE after it.
417 addPass(createEarlyCSEPass());
418}
419
Tom Stellard45bb48e2015-06-13 03:28:10 +0000420void AMDGPUPassConfig::addIRPasses() {
Matt Arsenaultbde80342016-05-18 15:41:07 +0000421 // There is no reason to run these.
422 disablePass(&StackMapLivenessID);
423 disablePass(&FuncletLayoutID);
424 disablePass(&PatchableFunctionID);
425
Tom Stellard45bb48e2015-06-13 03:28:10 +0000426 // Function calls are not supported, so make sure we inline everything.
427 addPass(createAMDGPUAlwaysInlinePass());
Chandler Carruth67fc52f2016-08-17 02:56:20 +0000428 addPass(createAlwaysInlinerLegacyPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000429 // We need to add the barrier noop pass, otherwise adding the function
430 // inlining pass will cause all of the PassConfigs passes to be run
431 // one function at a time, which means if we have a nodule with two
432 // functions, then we will generate code for the first function
433 // without ever running any passes on the second.
434 addPass(createBarrierNoopPass());
Matt Arsenault39319482015-11-06 18:01:57 +0000435
Tom Stellardfd253952015-08-07 23:19:30 +0000436 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
437 addPass(createAMDGPUOpenCLImageTypeLoweringPass());
Matt Arsenault39319482015-11-06 18:01:57 +0000438
Matt Arsenaulte0132462016-01-30 05:19:45 +0000439 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
Matt Arsenault03d85842016-06-27 20:32:13 +0000440 if (TM.getOptLevel() > CodeGenOpt::None) {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000441 addPass(createAMDGPUPromoteAlloca(&TM));
Matt Arsenault03d85842016-06-27 20:32:13 +0000442
443 if (EnableSROA)
444 addPass(createSROAPass());
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000445
Konstantin Zhuravlyov4658e5f2016-09-30 16:39:24 +0000446 addStraightLineScalarOptimizationPasses();
447 }
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000448
449 TargetPassConfig::addIRPasses();
450
451 // EarlyCSE is not always strong enough to clean up what LSR produces. For
452 // example, GVN can combine
453 //
454 // %0 = add %a, %b
455 // %1 = add %b, %a
456 //
457 // and
458 //
459 // %0 = shl nsw %a, 2
460 // %1 = shl %a, 2
461 //
462 // but EarlyCSE can do neither of them.
463 if (getOptLevel() != CodeGenOpt::None)
464 addEarlyCSEOrGVNPass();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000465}
466
Matt Arsenault908b9e22016-07-01 03:33:52 +0000467void AMDGPUPassConfig::addCodeGenPrepare() {
468 TargetPassConfig::addCodeGenPrepare();
469
470 if (EnableLoadStoreVectorizer)
471 addPass(createLoadStoreVectorizerPass());
472}
473
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000474bool AMDGPUPassConfig::addPreISel() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000475 addPass(createFlattenCFGPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000476 return false;
477}
478
479bool AMDGPUPassConfig::addInstSelector() {
Konstantin Zhuravlyov60a83732016-10-03 18:47:26 +0000480 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
Tom Stellard45bb48e2015-06-13 03:28:10 +0000481 return false;
482}
483
Matt Arsenault0a109002015-09-25 17:41:20 +0000484bool AMDGPUPassConfig::addGCPasses() {
485 // Do nothing. GC is not supported.
486 return false;
487}
488
Tom Stellard45bb48e2015-06-13 03:28:10 +0000489//===----------------------------------------------------------------------===//
490// R600 Pass Setup
491//===----------------------------------------------------------------------===//
492
493bool R600PassConfig::addPreISel() {
494 AMDGPUPassConfig::addPreISel();
Matt Arsenaultc5816112016-06-24 06:30:22 +0000495
496 if (EnableR600StructurizeCFG)
Tom Stellardbc4497b2016-02-12 23:45:29 +0000497 addPass(createStructurizeCFGPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000498 return false;
499}
500
501void R600PassConfig::addPreRegAlloc() {
502 addPass(createR600VectorRegMerger(*TM));
503}
504
505void R600PassConfig::addPreSched2() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000506 addPass(createR600EmitClauseMarkers(), false);
Matt Arsenault03d85842016-06-27 20:32:13 +0000507 if (EnableR600IfConvert)
Tom Stellard45bb48e2015-06-13 03:28:10 +0000508 addPass(&IfConverterID, false);
509 addPass(createR600ClauseMergePass(*TM), false);
510}
511
512void R600PassConfig::addPreEmitPass() {
513 addPass(createAMDGPUCFGStructurizerPass(), false);
514 addPass(createR600ExpandSpecialInstrsPass(*TM), false);
515 addPass(&FinalizeMachineBundlesID, false);
516 addPass(createR600Packetizer(*TM), false);
517 addPass(createR600ControlFlowFinalizer(*TM), false);
518}
519
520TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
521 return new R600PassConfig(this, PM);
522}
523
524//===----------------------------------------------------------------------===//
525// GCN Pass Setup
526//===----------------------------------------------------------------------===//
527
Matt Arsenault03d85842016-06-27 20:32:13 +0000528ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
529 MachineSchedContext *C) const {
530 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
531 if (ST.enableSIScheduler())
532 return createSIMachineScheduler(C);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000533 return createGCNMaxOccupancyMachineScheduler(C);
Matt Arsenault03d85842016-06-27 20:32:13 +0000534}
535
Tom Stellard45bb48e2015-06-13 03:28:10 +0000536bool GCNPassConfig::addPreISel() {
537 AMDGPUPassConfig::addPreISel();
Matt Arsenault39319482015-11-06 18:01:57 +0000538
539 // FIXME: We need to run a pass to propagate the attributes when calls are
540 // supported.
541 addPass(&AMDGPUAnnotateKernelFeaturesID);
Tom Stellardbc4497b2016-02-12 23:45:29 +0000542 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
Tom Stellard45bb48e2015-06-13 03:28:10 +0000543 addPass(createSinkingPass());
544 addPass(createSITypeRewriter());
Tom Stellarda6f24c62015-12-15 20:55:55 +0000545 addPass(createAMDGPUAnnotateUniformValues());
Tom Stellardbc4497b2016-02-12 23:45:29 +0000546 addPass(createSIAnnotateControlFlowPass());
Tom Stellarda6f24c62015-12-15 20:55:55 +0000547
Tom Stellard45bb48e2015-06-13 03:28:10 +0000548 return false;
549}
550
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000551void GCNPassConfig::addMachineSSAOptimization() {
552 TargetPassConfig::addMachineSSAOptimization();
553
554 // We want to fold operands after PeepholeOptimizer has run (or as part of
555 // it), because it will eliminate extra copies making it easier to fold the
556 // real source operand. We want to eliminate dead instructions after, so that
557 // we see fewer uses of the copies. We then need to clean up the dead
558 // instructions leftover after the operands are folded as well.
559 //
560 // XXX - Can we get away without running DeadMachineInstructionElim again?
561 addPass(&SIFoldOperandsID);
562 addPass(&DeadMachineInstructionElimID);
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000563 addPass(&SILoadStoreOptimizerID);
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000564}
565
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +0000566bool GCNPassConfig::addILPOpts() {
567 if (EnableEarlyIfConversion)
568 addPass(&EarlyIfConverterID);
569
570 TargetPassConfig::addILPOpts();
571 return false;
572}
573
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000574void GCNPassConfig::addIRPasses() {
575 // TODO: May want to move later or split into an early and late one.
576 addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
577
578 AMDGPUPassConfig::addIRPasses();
579}
580
Tom Stellard45bb48e2015-06-13 03:28:10 +0000581bool GCNPassConfig::addInstSelector() {
582 AMDGPUPassConfig::addInstSelector();
583 addPass(createSILowerI1CopiesPass());
Matt Arsenault782c03b2015-11-03 22:30:13 +0000584 addPass(&SIFixSGPRCopiesID);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000585 return false;
586}
587
Tom Stellard000c5af2016-04-14 19:09:28 +0000588#ifdef LLVM_BUILD_GLOBAL_ISEL
589bool GCNPassConfig::addIRTranslator() {
590 addPass(new IRTranslator());
591 return false;
592}
593
Tim Northover33b07d62016-07-22 20:03:43 +0000594bool GCNPassConfig::addLegalizeMachineIR() {
595 return false;
596}
597
Tom Stellard000c5af2016-04-14 19:09:28 +0000598bool GCNPassConfig::addRegBankSelect() {
599 return false;
600}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000601
602bool GCNPassConfig::addGlobalInstructionSelect() {
603 return false;
604}
Tom Stellard000c5af2016-04-14 19:09:28 +0000605#endif
606
Tom Stellard45bb48e2015-06-13 03:28:10 +0000607void GCNPassConfig::addPreRegAlloc() {
Matt Arsenault4a07bf62016-06-22 20:26:24 +0000608 addPass(createSIShrinkInstructionsPass());
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000609 addPass(createSIWholeQuadModePass());
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000610}
611
612void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
Matt Arsenault78fc9da2016-08-22 19:33:16 +0000613 // FIXME: We have to disable the verifier here because of PHIElimination +
614 // TwoAddressInstructions disabling it.
Matt Arsenaulte6740752016-09-29 01:44:16 +0000615
616 // This must be run immediately after phi elimination and before
617 // TwoAddressInstructions, otherwise the processing of the tied operand of
618 // SI_ELSE will introduce a copy of the tied operand source after the else.
619 insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
Matt Arsenault78fc9da2016-08-22 19:33:16 +0000620
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000621 TargetPassConfig::addFastRegAlloc(RegAllocPass);
622}
623
624void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
Matt Arsenault78fc9da2016-08-22 19:33:16 +0000625 // This needs to be run directly before register allocation because earlier
626 // passes might recompute live intervals.
627 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
628
Matt Arsenaulte6740752016-09-29 01:44:16 +0000629 // This must be run immediately after phi elimination and before
630 // TwoAddressInstructions, otherwise the processing of the tied operand of
631 // SI_ELSE will introduce a copy of the tied operand source after the else.
632 insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
Matt Arsenault78fc9da2016-08-22 19:33:16 +0000633
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000634 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000635}
636
Matt Arsenaulte6740752016-09-29 01:44:16 +0000637void GCNPassConfig::addPostRegAlloc() {
Stanislav Mekhanoshin22a56f22017-01-24 17:46:17 +0000638 addPass(&SIFixVGPRCopiesID);
Matt Arsenaulte6740752016-09-29 01:44:16 +0000639 addPass(&SIOptimizeExecMaskingID);
640 TargetPassConfig::addPostRegAlloc();
641}
642
Tom Stellard45bb48e2015-06-13 03:28:10 +0000643void GCNPassConfig::addPreSched2() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000644}
645
646void GCNPassConfig::addPreEmitPass() {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000647 // The hazard recognizer that runs as part of the post-ra scheduler does not
Matt Arsenault254a6452016-06-28 16:59:53 +0000648 // guarantee to be able handle all hazards correctly. This is because if there
649 // are multiple scheduling regions in a basic block, the regions are scheduled
650 // bottom up, so when we begin to schedule a region we don't know what
651 // instructions were emitted directly before it.
Tom Stellardcb6ba622016-04-30 00:23:06 +0000652 //
Matt Arsenault254a6452016-06-28 16:59:53 +0000653 // Here we add a stand-alone hazard recognizer pass which can handle all
654 // cases.
Tom Stellardcb6ba622016-04-30 00:23:06 +0000655 addPass(&PostRAHazardRecognizerID);
656
Matt Arsenaulte2bd9a32016-06-09 23:19:14 +0000657 addPass(createSIInsertWaitsPass());
Matt Arsenaultcf2744f2016-04-29 20:23:42 +0000658 addPass(createSIShrinkInstructionsPass());
Matt Arsenault78fc9da2016-08-22 19:33:16 +0000659 addPass(&SIInsertSkipsPassID);
Matt Arsenault9babdf42016-06-22 20:15:28 +0000660 addPass(createSIDebuggerInsertNopsPass());
Matt Arsenault6bc43d82016-10-06 16:20:41 +0000661 addPass(&BranchRelaxationPassID);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000662}
663
664TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
665 return new GCNPassConfig(this, PM);
666}