blob: 50d12393482d38e1aede20530e7e496b85f0f19f [file] [log] [blame]
Tom Stellard45bb48e2015-06-13 03:28:10 +00001//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief The AMDGPU target machine contains all of the hardware specific
12/// information needed to emit code for R600 and SI GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUTargetMachine.h"
Tom Stellardc93fc112015-12-10 02:13:01 +000017#include "AMDGPUTargetObjectFile.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000018#include "AMDGPU.h"
19#include "AMDGPUTargetTransformInfo.h"
20#include "R600ISelLowering.h"
21#include "R600InstrInfo.h"
22#include "R600MachineScheduler.h"
23#include "SIISelLowering.h"
24#include "SIInstrInfo.h"
25#include "llvm/Analysis/Passes.h"
Matt Arsenault55dff272016-06-28 00:11:26 +000026#include "llvm/CodeGen/GlobalISel/CallLowering.h"
Tom Stellard000c5af2016-04-14 19:09:28 +000027#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000028#include "llvm/CodeGen/MachineFunctionAnalysis.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000029#include "llvm/CodeGen/MachineModuleInfo.h"
30#include "llvm/CodeGen/Passes.h"
Matthias Braun31d19d42016-05-10 03:21:59 +000031#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32#include "llvm/CodeGen/TargetPassConfig.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000033#include "llvm/IR/Verifier.h"
34#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/IR/LegacyPassManager.h"
36#include "llvm/Support/TargetRegistry.h"
37#include "llvm/Support/raw_os_ostream.h"
38#include "llvm/Transforms/IPO.h"
39#include "llvm/Transforms/Scalar.h"
Matt Arsenaultf42c6922016-06-15 00:11:01 +000040#include "llvm/Transforms/Scalar/GVN.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000041
42using namespace llvm;
43
Matt Arsenaultc5816112016-06-24 06:30:22 +000044static cl::opt<bool> EnableR600StructurizeCFG(
45 "r600-ir-structurize",
46 cl::desc("Use StructurizeCFG IR pass"),
47 cl::init(true));
48
Matt Arsenault03d85842016-06-27 20:32:13 +000049static cl::opt<bool> EnableSROA(
50 "amdgpu-sroa",
51 cl::desc("Run SROA after promote alloca pass"),
52 cl::ReallyHidden,
53 cl::init(true));
54
55static cl::opt<bool> EnableR600IfConvert(
56 "r600-if-convert",
57 cl::desc("Use if conversion pass"),
58 cl::ReallyHidden,
59 cl::init(true));
60
Tom Stellard45bb48e2015-06-13 03:28:10 +000061extern "C" void LLVMInitializeAMDGPUTarget() {
62 // Register the target
63 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
64 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
Matt Arsenaultb87fc222015-10-01 22:10:03 +000065
66 PassRegistry *PR = PassRegistry::getPassRegistry();
Matt Arsenault8c0ef8b2015-10-12 17:43:59 +000067 initializeSILowerI1CopiesPass(*PR);
Matt Arsenault782c03b2015-11-03 22:30:13 +000068 initializeSIFixSGPRCopiesPass(*PR);
Matt Arsenault8c0ef8b2015-10-12 17:43:59 +000069 initializeSIFoldOperandsPass(*PR);
Matt Arsenaultc3a01ec2016-06-09 23:18:47 +000070 initializeSIShrinkInstructionsPass(*PR);
Matt Arsenault187276f2015-10-07 00:42:53 +000071 initializeSIFixControlFlowLiveIntervalsPass(*PR);
72 initializeSILoadStoreOptimizerPass(*PR);
Matt Arsenault39319482015-11-06 18:01:57 +000073 initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
Tom Stellarda6f24c62015-12-15 20:55:55 +000074 initializeAMDGPUAnnotateUniformValuesPass(*PR);
Matt Arsenaulte0132462016-01-30 05:19:45 +000075 initializeAMDGPUPromoteAllocaPass(*PR);
Matt Arsenault86de4862016-06-24 07:07:55 +000076 initializeAMDGPUCodeGenPreparePass(*PR);
Tom Stellard77a17772016-01-20 15:48:27 +000077 initializeSIAnnotateControlFlowPass(*PR);
Konstantin Zhuravlyova7919322016-05-10 18:33:41 +000078 initializeSIDebuggerInsertNopsPass(*PR);
Tom Stellard6e1967e2016-02-05 17:42:38 +000079 initializeSIInsertWaitsPass(*PR);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000080 initializeSIWholeQuadModePass(*PR);
Matt Arsenault55d49cf2016-02-12 02:16:10 +000081 initializeSILowerControlFlowPass(*PR);
Matt Arsenaultd3e4c642016-06-02 00:04:22 +000082 initializeSIDebuggerInsertNopsPass(*PR);
Tom Stellard45bb48e2015-06-13 03:28:10 +000083}
84
Tom Stellarde135ffd2015-09-25 21:41:28 +000085static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
Tom Stellardc93fc112015-12-10 02:13:01 +000086 return make_unique<AMDGPUTargetObjectFile>();
Tom Stellarde135ffd2015-09-25 21:41:28 +000087}
88
Tom Stellard45bb48e2015-06-13 03:28:10 +000089static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
90 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
91}
92
93static MachineSchedRegistry
Nicolai Haehnle02c32912016-01-13 16:10:10 +000094R600SchedRegistry("r600", "Run R600's custom scheduler",
95 createR600MachineScheduler);
96
97static MachineSchedRegistry
98SISchedRegistry("si", "Run SI's custom scheduler",
99 createSIMachineScheduler);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000100
Matt Arsenaultec30eb52016-05-31 16:57:45 +0000101static StringRef computeDataLayout(const Triple &TT) {
102 if (TT.getArch() == Triple::r600) {
103 // 32-bit pointers.
104 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
105 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
Tom Stellard45bb48e2015-06-13 03:28:10 +0000106 }
107
Matt Arsenaultec30eb52016-05-31 16:57:45 +0000108 // 32-bit private, local, and region pointers. 64-bit global, constant and
109 // flat.
110 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
111 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
112 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
Tom Stellard45bb48e2015-06-13 03:28:10 +0000113}
114
Matt Arsenaultb22828f2016-01-27 02:17:49 +0000115LLVM_READNONE
116static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
117 if (!GPU.empty())
118 return GPU;
119
120 // HSA only supports CI+, so change the default GPU to a CI for HSA.
121 if (TT.getArch() == Triple::amdgcn)
122 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
123
Matt Arsenault8e001942016-06-02 18:37:16 +0000124 return "r600";
Matt Arsenaultb22828f2016-01-27 02:17:49 +0000125}
126
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000127static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
128 if (!RM.hasValue())
129 return Reloc::PIC_;
130 return *RM;
131}
132
Tom Stellard45bb48e2015-06-13 03:28:10 +0000133AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
134 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000135 TargetOptions Options,
136 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000137 CodeModel::Model CM,
138 CodeGenOpt::Level OptLevel)
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000139 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
140 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
141 TLOF(createTLOF(getTargetTriple())),
142 IntrinsicInfo() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000143 setRequiresStructuredCFG(true);
144 initAsmInfo();
145}
146
Tom Stellarde135ffd2015-09-25 21:41:28 +0000147AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000148
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000149StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
150 Attribute GPUAttr = F.getFnAttribute("target-cpu");
151 return GPUAttr.hasAttribute(Attribute::None) ?
152 getTargetCPU() : GPUAttr.getValueAsString();
153}
154
155StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
156 Attribute FSAttr = F.getFnAttribute("target-features");
157
158 return FSAttr.hasAttribute(Attribute::None) ?
159 getTargetFeatureString() :
160 FSAttr.getValueAsString();
161}
162
Tom Stellard45bb48e2015-06-13 03:28:10 +0000163//===----------------------------------------------------------------------===//
164// R600 Target Machine (R600 -> Cayman)
165//===----------------------------------------------------------------------===//
166
167R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
Tom Stellard5dde1d22016-02-05 18:29:17 +0000168 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000169 TargetOptions Options,
170 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000171 CodeModel::Model CM, CodeGenOpt::Level OL)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000172 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
173
174const R600Subtarget *R600TargetMachine::getSubtargetImpl(
175 const Function &F) const {
176 StringRef GPU = getGPUName(F);
177 StringRef FS = getFeatureString(F);
178
179 SmallString<128> SubtargetKey(GPU);
180 SubtargetKey.append(FS);
181
182 auto &I = SubtargetMap[SubtargetKey];
183 if (!I) {
184 // This needs to be done before we create a new subtarget since any
185 // creation will depend on the TM and the code generation flags on the
186 // function that reside in TargetOptions.
187 resetTargetOptions(F);
188 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
189 }
190
191 return I.get();
192}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000193
194//===----------------------------------------------------------------------===//
195// GCN Target Machine (SI+)
196//===----------------------------------------------------------------------===//
197
Matt Arsenault55dff272016-06-28 00:11:26 +0000198#ifdef LLVM_BUILD_GLOBAL_ISEL
199namespace {
200struct SIGISelActualAccessor : public GISelAccessor {
201 std::unique_ptr<CallLowering> CallLoweringInfo;
202 const CallLowering *getCallLowering() const override {
203 return CallLoweringInfo.get();
204 }
205};
206} // End anonymous namespace.
207#endif
208
Tom Stellard45bb48e2015-06-13 03:28:10 +0000209GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
Tom Stellard5dde1d22016-02-05 18:29:17 +0000210 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000211 TargetOptions Options,
212 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000213 CodeModel::Model CM, CodeGenOpt::Level OL)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000214 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
215
216const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
217 StringRef GPU = getGPUName(F);
218 StringRef FS = getFeatureString(F);
219
220 SmallString<128> SubtargetKey(GPU);
221 SubtargetKey.append(FS);
222
223 auto &I = SubtargetMap[SubtargetKey];
224 if (!I) {
225 // This needs to be done before we create a new subtarget since any
226 // creation will depend on the TM and the code generation flags on the
227 // function that reside in TargetOptions.
228 resetTargetOptions(F);
229 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
230
231#ifndef LLVM_BUILD_GLOBAL_ISEL
232 GISelAccessor *GISel = new GISelAccessor();
233#else
234 SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
235#endif
236
237 I->setGISelAccessor(*GISel);
238 }
239
240 return I.get();
241}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000242
243//===----------------------------------------------------------------------===//
244// AMDGPU Pass Setup
245//===----------------------------------------------------------------------===//
246
247namespace {
Tom Stellardcc7067a62016-03-03 03:53:29 +0000248
Tom Stellard45bb48e2015-06-13 03:28:10 +0000249class AMDGPUPassConfig : public TargetPassConfig {
250public:
251 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
Matt Arsenault0a109002015-09-25 17:41:20 +0000252 : TargetPassConfig(TM, PM) {
253
254 // Exceptions and StackMaps are not supported, so these passes will never do
255 // anything.
256 disablePass(&StackMapLivenessID);
257 disablePass(&FuncletLayoutID);
258 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000259
260 AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
261 return getTM<AMDGPUTargetMachine>();
262 }
263
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000264 void addEarlyCSEOrGVNPass();
265 void addStraightLineScalarOptimizationPasses();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000266 void addIRPasses() override;
Matt Arsenault0a109002015-09-25 17:41:20 +0000267 bool addPreISel() override;
268 bool addInstSelector() override;
269 bool addGCPasses() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000270};
271
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000272class R600PassConfig final : public AMDGPUPassConfig {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000273public:
274 R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
275 : AMDGPUPassConfig(TM, PM) { }
276
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000277 ScheduleDAGInstrs *createMachineScheduler(
278 MachineSchedContext *C) const override {
279 return createR600MachineScheduler(C);
280 }
281
Tom Stellard45bb48e2015-06-13 03:28:10 +0000282 bool addPreISel() override;
283 void addPreRegAlloc() override;
284 void addPreSched2() override;
285 void addPreEmitPass() override;
286};
287
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000288class GCNPassConfig final : public AMDGPUPassConfig {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000289public:
290 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
291 : AMDGPUPassConfig(TM, PM) { }
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000292
293 GCNTargetMachine &getGCNTargetMachine() const {
294 return getTM<GCNTargetMachine>();
295 }
296
297 ScheduleDAGInstrs *
Matt Arsenault03d85842016-06-27 20:32:13 +0000298 createMachineScheduler(MachineSchedContext *C) const override;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000299
Tom Stellard45bb48e2015-06-13 03:28:10 +0000300 bool addPreISel() override;
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000301 void addMachineSSAOptimization() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000302 bool addInstSelector() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000303#ifdef LLVM_BUILD_GLOBAL_ISEL
304 bool addIRTranslator() override;
305 bool addRegBankSelect() override;
306#endif
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000307 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
308 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000309 void addPreRegAlloc() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000310 void addPreSched2() override;
311 void addPreEmitPass() override;
312};
313
314} // End of anonymous namespace
315
316TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
Eric Christophera4e5d3c2015-09-16 23:38:13 +0000317 return TargetIRAnalysis([this](const Function &F) {
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000318 return TargetTransformInfo(AMDGPUTTIImpl(this, F));
Mehdi Amini5010ebf2015-07-09 02:08:42 +0000319 });
Tom Stellard45bb48e2015-06-13 03:28:10 +0000320}
321
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000322void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
323 if (getOptLevel() == CodeGenOpt::Aggressive)
324 addPass(createGVNPass());
325 else
326 addPass(createEarlyCSEPass());
327}
328
329void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
330 addPass(createSeparateConstOffsetFromGEPPass());
331 addPass(createSpeculativeExecutionPass());
332 // ReassociateGEPs exposes more opportunites for SLSR. See
333 // the example in reassociate-geps-and-slsr.ll.
334 addPass(createStraightLineStrengthReducePass());
335 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
336 // EarlyCSE can reuse.
337 addEarlyCSEOrGVNPass();
338 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
339 addPass(createNaryReassociatePass());
340 // NaryReassociate on GEPs creates redundant common expressions, so run
341 // EarlyCSE after it.
342 addPass(createEarlyCSEPass());
343}
344
Tom Stellard45bb48e2015-06-13 03:28:10 +0000345void AMDGPUPassConfig::addIRPasses() {
Matt Arsenaultbde80342016-05-18 15:41:07 +0000346 // There is no reason to run these.
347 disablePass(&StackMapLivenessID);
348 disablePass(&FuncletLayoutID);
349 disablePass(&PatchableFunctionID);
350
Tom Stellard45bb48e2015-06-13 03:28:10 +0000351 // Function calls are not supported, so make sure we inline everything.
352 addPass(createAMDGPUAlwaysInlinePass());
353 addPass(createAlwaysInlinerPass());
354 // We need to add the barrier noop pass, otherwise adding the function
355 // inlining pass will cause all of the PassConfigs passes to be run
356 // one function at a time, which means if we have a nodule with two
357 // functions, then we will generate code for the first function
358 // without ever running any passes on the second.
359 addPass(createBarrierNoopPass());
Matt Arsenault39319482015-11-06 18:01:57 +0000360
Tom Stellardfd253952015-08-07 23:19:30 +0000361 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
362 addPass(createAMDGPUOpenCLImageTypeLoweringPass());
Matt Arsenault39319482015-11-06 18:01:57 +0000363
Matt Arsenaulte0132462016-01-30 05:19:45 +0000364 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
Matt Arsenault03d85842016-06-27 20:32:13 +0000365 if (TM.getOptLevel() > CodeGenOpt::None) {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000366 addPass(createAMDGPUPromoteAlloca(&TM));
Matt Arsenault03d85842016-06-27 20:32:13 +0000367
368 if (EnableSROA)
369 addPass(createSROAPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000370 }
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000371
372 addStraightLineScalarOptimizationPasses();
373
374 TargetPassConfig::addIRPasses();
375
376 // EarlyCSE is not always strong enough to clean up what LSR produces. For
377 // example, GVN can combine
378 //
379 // %0 = add %a, %b
380 // %1 = add %b, %a
381 //
382 // and
383 //
384 // %0 = shl nsw %a, 2
385 // %1 = shl %a, 2
386 //
387 // but EarlyCSE can do neither of them.
388 if (getOptLevel() != CodeGenOpt::None)
389 addEarlyCSEOrGVNPass();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000390}
391
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000392bool AMDGPUPassConfig::addPreISel() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000393 addPass(createFlattenCFGPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000394 return false;
395}
396
397bool AMDGPUPassConfig::addInstSelector() {
398 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
399 return false;
400}
401
Matt Arsenault0a109002015-09-25 17:41:20 +0000402bool AMDGPUPassConfig::addGCPasses() {
403 // Do nothing. GC is not supported.
404 return false;
405}
406
Tom Stellard45bb48e2015-06-13 03:28:10 +0000407//===----------------------------------------------------------------------===//
408// R600 Pass Setup
409//===----------------------------------------------------------------------===//
410
411bool R600PassConfig::addPreISel() {
412 AMDGPUPassConfig::addPreISel();
Matt Arsenaultc5816112016-06-24 06:30:22 +0000413
414 if (EnableR600StructurizeCFG)
Tom Stellardbc4497b2016-02-12 23:45:29 +0000415 addPass(createStructurizeCFGPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000416 addPass(createR600TextureIntrinsicsReplacer());
417 return false;
418}
419
420void R600PassConfig::addPreRegAlloc() {
421 addPass(createR600VectorRegMerger(*TM));
422}
423
424void R600PassConfig::addPreSched2() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000425 addPass(createR600EmitClauseMarkers(), false);
Matt Arsenault03d85842016-06-27 20:32:13 +0000426 if (EnableR600IfConvert)
Tom Stellard45bb48e2015-06-13 03:28:10 +0000427 addPass(&IfConverterID, false);
428 addPass(createR600ClauseMergePass(*TM), false);
429}
430
431void R600PassConfig::addPreEmitPass() {
432 addPass(createAMDGPUCFGStructurizerPass(), false);
433 addPass(createR600ExpandSpecialInstrsPass(*TM), false);
434 addPass(&FinalizeMachineBundlesID, false);
435 addPass(createR600Packetizer(*TM), false);
436 addPass(createR600ControlFlowFinalizer(*TM), false);
437}
438
439TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
440 return new R600PassConfig(this, PM);
441}
442
443//===----------------------------------------------------------------------===//
444// GCN Pass Setup
445//===----------------------------------------------------------------------===//
446
Matt Arsenault03d85842016-06-27 20:32:13 +0000447ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
448 MachineSchedContext *C) const {
449 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
450 if (ST.enableSIScheduler())
451 return createSIMachineScheduler(C);
452 return nullptr;
453}
454
Tom Stellard45bb48e2015-06-13 03:28:10 +0000455bool GCNPassConfig::addPreISel() {
456 AMDGPUPassConfig::addPreISel();
Matt Arsenault39319482015-11-06 18:01:57 +0000457
458 // FIXME: We need to run a pass to propagate the attributes when calls are
459 // supported.
460 addPass(&AMDGPUAnnotateKernelFeaturesID);
Tom Stellardbc4497b2016-02-12 23:45:29 +0000461 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
Tom Stellard45bb48e2015-06-13 03:28:10 +0000462 addPass(createSinkingPass());
463 addPass(createSITypeRewriter());
Tom Stellarda6f24c62015-12-15 20:55:55 +0000464 addPass(createAMDGPUAnnotateUniformValues());
Tom Stellardbc4497b2016-02-12 23:45:29 +0000465 addPass(createSIAnnotateControlFlowPass());
Tom Stellarda6f24c62015-12-15 20:55:55 +0000466
Tom Stellard45bb48e2015-06-13 03:28:10 +0000467 return false;
468}
469
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000470void GCNPassConfig::addMachineSSAOptimization() {
471 TargetPassConfig::addMachineSSAOptimization();
472
473 // We want to fold operands after PeepholeOptimizer has run (or as part of
474 // it), because it will eliminate extra copies making it easier to fold the
475 // real source operand. We want to eliminate dead instructions after, so that
476 // we see fewer uses of the copies. We then need to clean up the dead
477 // instructions leftover after the operands are folded as well.
478 //
479 // XXX - Can we get away without running DeadMachineInstructionElim again?
480 addPass(&SIFoldOperandsID);
481 addPass(&DeadMachineInstructionElimID);
482}
483
Tom Stellard45bb48e2015-06-13 03:28:10 +0000484bool GCNPassConfig::addInstSelector() {
485 AMDGPUPassConfig::addInstSelector();
486 addPass(createSILowerI1CopiesPass());
Matt Arsenault782c03b2015-11-03 22:30:13 +0000487 addPass(&SIFixSGPRCopiesID);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000488 return false;
489}
490
Tom Stellard000c5af2016-04-14 19:09:28 +0000491#ifdef LLVM_BUILD_GLOBAL_ISEL
492bool GCNPassConfig::addIRTranslator() {
493 addPass(new IRTranslator());
494 return false;
495}
496
497bool GCNPassConfig::addRegBankSelect() {
498 return false;
499}
500#endif
501
Tom Stellard45bb48e2015-06-13 03:28:10 +0000502void GCNPassConfig::addPreRegAlloc() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000503 // This needs to be run directly before register allocation because
504 // earlier passes might recompute live intervals.
505 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
506 if (getOptLevel() > CodeGenOpt::None) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000507 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
508 }
509
Matt Arsenault03d85842016-06-27 20:32:13 +0000510 if (getOptLevel() > CodeGenOpt::None) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000511 // Don't do this with no optimizations since it throws away debug info by
512 // merging nonadjacent loads.
513
514 // This should be run after scheduling, but before register allocation. It
515 // also need extra copies to the address operand to be eliminated.
Matt Arsenault03d85842016-06-27 20:32:13 +0000516
517 // FIXME: Move pre-RA and remove extra reg coalescer run.
Tom Stellard45bb48e2015-06-13 03:28:10 +0000518 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
Matt Arsenault84db5d92015-07-14 17:57:36 +0000519 insertPass(&MachineSchedulerID, &RegisterCoalescerID);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000520 }
Matt Arsenault03d85842016-06-27 20:32:13 +0000521
Matt Arsenault4a07bf62016-06-22 20:26:24 +0000522 addPass(createSIShrinkInstructionsPass());
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000523 addPass(createSIWholeQuadModePass());
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000524}
525
526void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000527 TargetPassConfig::addFastRegAlloc(RegAllocPass);
528}
529
530void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000531 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000532}
533
Tom Stellard45bb48e2015-06-13 03:28:10 +0000534void GCNPassConfig::addPreSched2() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000535}
536
537void GCNPassConfig::addPreEmitPass() {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000538
539 // The hazard recognizer that runs as part of the post-ra scheduler does not
540 // gaurantee to be able handle all hazards correctly. This is because
541 // if there are multiple scheduling regions in a basic block, the regions
542 // are scheduled bottom up, so when we begin to schedule a region we don't
543 // know what instructions were emitted directly before it.
544 //
545 // Here we add a stand-alone hazard recognizer pass which can handle all cases.
546 // hazard recognizer pass.
547 addPass(&PostRAHazardRecognizerID);
548
Matt Arsenaulte2bd9a32016-06-09 23:19:14 +0000549 addPass(createSIInsertWaitsPass());
Matt Arsenaultcf2744f2016-04-29 20:23:42 +0000550 addPass(createSIShrinkInstructionsPass());
Matt Arsenault9babdf42016-06-22 20:15:28 +0000551 addPass(createSILowerControlFlowPass());
552 addPass(createSIDebuggerInsertNopsPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000553}
554
555TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
556 return new GCNPassConfig(this, PM);
557}