blob: f782ea31a506d1f45fefc056d8e985c218fe6892 [file] [log] [blame]
Tom Stellard45bb48e2015-06-13 03:28:10 +00001//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief The AMDGPU target machine contains all of the hardware specific
12/// information needed to emit code for R600 and SI GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUTargetMachine.h"
17#include "AMDGPU.h"
Matt Arsenaulteb9025d2016-06-28 17:42:09 +000018#include "AMDGPUCallLowering.h"
19#include "AMDGPUTargetObjectFile.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000020#include "AMDGPUTargetTransformInfo.h"
21#include "R600ISelLowering.h"
22#include "R600InstrInfo.h"
23#include "R600MachineScheduler.h"
24#include "SIISelLowering.h"
25#include "SIInstrInfo.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000026#include "SIMachineScheduler.h"
Tom Stellard000c5af2016-04-14 19:09:28 +000027#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000028#include "llvm/CodeGen/Passes.h"
Matthias Braun31d19d42016-05-10 03:21:59 +000029#include "llvm/CodeGen/TargetPassConfig.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000030#include "llvm/Support/TargetRegistry.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000031#include "llvm/Transforms/IPO.h"
Chandler Carruth67fc52f2016-08-17 02:56:20 +000032#include "llvm/Transforms/IPO/AlwaysInliner.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000033#include "llvm/Transforms/Scalar.h"
Matt Arsenaultf42c6922016-06-15 00:11:01 +000034#include "llvm/Transforms/Scalar/GVN.h"
Matt Arsenault908b9e22016-07-01 03:33:52 +000035#include "llvm/Transforms/Vectorize.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000036
37using namespace llvm;
38
Matt Arsenaultc5816112016-06-24 06:30:22 +000039static cl::opt<bool> EnableR600StructurizeCFG(
40 "r600-ir-structurize",
41 cl::desc("Use StructurizeCFG IR pass"),
42 cl::init(true));
43
Matt Arsenault03d85842016-06-27 20:32:13 +000044static cl::opt<bool> EnableSROA(
45 "amdgpu-sroa",
46 cl::desc("Run SROA after promote alloca pass"),
47 cl::ReallyHidden,
48 cl::init(true));
49
50static cl::opt<bool> EnableR600IfConvert(
51 "r600-if-convert",
52 cl::desc("Use if conversion pass"),
53 cl::ReallyHidden,
54 cl::init(true));
55
Matt Arsenault908b9e22016-07-01 03:33:52 +000056// Option to disable vectorizer for tests.
57static cl::opt<bool> EnableLoadStoreVectorizer(
58 "amdgpu-load-store-vectorizer",
59 cl::desc("Enable load store vectorizer"),
60 cl::init(false),
61 cl::Hidden);
62
Tom Stellard45bb48e2015-06-13 03:28:10 +000063extern "C" void LLVMInitializeAMDGPUTarget() {
64 // Register the target
65 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
66 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
Matt Arsenaultb87fc222015-10-01 22:10:03 +000067
68 PassRegistry *PR = PassRegistry::getPassRegistry();
Matt Arsenault8c0ef8b2015-10-12 17:43:59 +000069 initializeSILowerI1CopiesPass(*PR);
Matt Arsenault782c03b2015-11-03 22:30:13 +000070 initializeSIFixSGPRCopiesPass(*PR);
Matt Arsenault8c0ef8b2015-10-12 17:43:59 +000071 initializeSIFoldOperandsPass(*PR);
Matt Arsenaultc3a01ec2016-06-09 23:18:47 +000072 initializeSIShrinkInstructionsPass(*PR);
Matt Arsenault187276f2015-10-07 00:42:53 +000073 initializeSIFixControlFlowLiveIntervalsPass(*PR);
74 initializeSILoadStoreOptimizerPass(*PR);
Matt Arsenault39319482015-11-06 18:01:57 +000075 initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
Tom Stellarda6f24c62015-12-15 20:55:55 +000076 initializeAMDGPUAnnotateUniformValuesPass(*PR);
Matt Arsenaulte0132462016-01-30 05:19:45 +000077 initializeAMDGPUPromoteAllocaPass(*PR);
Matt Arsenault86de4862016-06-24 07:07:55 +000078 initializeAMDGPUCodeGenPreparePass(*PR);
Tom Stellard77a17772016-01-20 15:48:27 +000079 initializeSIAnnotateControlFlowPass(*PR);
Tom Stellard6e1967e2016-02-05 17:42:38 +000080 initializeSIInsertWaitsPass(*PR);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000081 initializeSIWholeQuadModePass(*PR);
Matt Arsenault55d49cf2016-02-12 02:16:10 +000082 initializeSILowerControlFlowPass(*PR);
Matt Arsenaultd3e4c642016-06-02 00:04:22 +000083 initializeSIDebuggerInsertNopsPass(*PR);
Tom Stellard45bb48e2015-06-13 03:28:10 +000084}
85
Tom Stellarde135ffd2015-09-25 21:41:28 +000086static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
Tom Stellardc93fc112015-12-10 02:13:01 +000087 return make_unique<AMDGPUTargetObjectFile>();
Tom Stellarde135ffd2015-09-25 21:41:28 +000088}
89
Tom Stellard45bb48e2015-06-13 03:28:10 +000090static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
91 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
92}
93
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000094static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
95 return new SIScheduleDAGMI(C);
96}
97
Tom Stellard45bb48e2015-06-13 03:28:10 +000098static MachineSchedRegistry
Nicolai Haehnle02c32912016-01-13 16:10:10 +000099R600SchedRegistry("r600", "Run R600's custom scheduler",
100 createR600MachineScheduler);
101
102static MachineSchedRegistry
103SISchedRegistry("si", "Run SI's custom scheduler",
104 createSIMachineScheduler);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000105
Matt Arsenaultec30eb52016-05-31 16:57:45 +0000106static StringRef computeDataLayout(const Triple &TT) {
107 if (TT.getArch() == Triple::r600) {
108 // 32-bit pointers.
109 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
110 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
Tom Stellard45bb48e2015-06-13 03:28:10 +0000111 }
112
Matt Arsenaultec30eb52016-05-31 16:57:45 +0000113 // 32-bit private, local, and region pointers. 64-bit global, constant and
114 // flat.
115 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
116 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
117 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
Tom Stellard45bb48e2015-06-13 03:28:10 +0000118}
119
Matt Arsenaultb22828f2016-01-27 02:17:49 +0000120LLVM_READNONE
121static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
122 if (!GPU.empty())
123 return GPU;
124
125 // HSA only supports CI+, so change the default GPU to a CI for HSA.
126 if (TT.getArch() == Triple::amdgcn)
127 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
128
Matt Arsenault8e001942016-06-02 18:37:16 +0000129 return "r600";
Matt Arsenaultb22828f2016-01-27 02:17:49 +0000130}
131
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000132static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
Tom Stellard418beb72016-07-13 14:23:33 +0000133 // The AMDGPU toolchain only supports generating shared objects, so we
134 // must always use PIC.
135 return Reloc::PIC_;
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000136}
137
Tom Stellard45bb48e2015-06-13 03:28:10 +0000138AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
139 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000140 TargetOptions Options,
141 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000142 CodeModel::Model CM,
143 CodeGenOpt::Level OptLevel)
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000144 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
145 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
146 TLOF(createTLOF(getTargetTriple())),
147 IntrinsicInfo() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000148 setRequiresStructuredCFG(true);
149 initAsmInfo();
150}
151
Tom Stellarde135ffd2015-09-25 21:41:28 +0000152AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000153
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000154StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
155 Attribute GPUAttr = F.getFnAttribute("target-cpu");
156 return GPUAttr.hasAttribute(Attribute::None) ?
157 getTargetCPU() : GPUAttr.getValueAsString();
158}
159
160StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
161 Attribute FSAttr = F.getFnAttribute("target-features");
162
163 return FSAttr.hasAttribute(Attribute::None) ?
164 getTargetFeatureString() :
165 FSAttr.getValueAsString();
166}
167
Tom Stellard45bb48e2015-06-13 03:28:10 +0000168//===----------------------------------------------------------------------===//
169// R600 Target Machine (R600 -> Cayman)
170//===----------------------------------------------------------------------===//
171
172R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
Tom Stellard5dde1d22016-02-05 18:29:17 +0000173 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000174 TargetOptions Options,
175 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000176 CodeModel::Model CM, CodeGenOpt::Level OL)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000177 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
178
179const R600Subtarget *R600TargetMachine::getSubtargetImpl(
180 const Function &F) const {
181 StringRef GPU = getGPUName(F);
182 StringRef FS = getFeatureString(F);
183
184 SmallString<128> SubtargetKey(GPU);
185 SubtargetKey.append(FS);
186
187 auto &I = SubtargetMap[SubtargetKey];
188 if (!I) {
189 // This needs to be done before we create a new subtarget since any
190 // creation will depend on the TM and the code generation flags on the
191 // function that reside in TargetOptions.
192 resetTargetOptions(F);
193 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
194 }
195
196 return I.get();
197}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000198
199//===----------------------------------------------------------------------===//
200// GCN Target Machine (SI+)
201//===----------------------------------------------------------------------===//
202
Matt Arsenault55dff272016-06-28 00:11:26 +0000203#ifdef LLVM_BUILD_GLOBAL_ISEL
204namespace {
205struct SIGISelActualAccessor : public GISelAccessor {
Matt Arsenaulteb9025d2016-06-28 17:42:09 +0000206 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
207 const AMDGPUCallLowering *getCallLowering() const override {
Matt Arsenault55dff272016-06-28 00:11:26 +0000208 return CallLoweringInfo.get();
209 }
210};
211} // End anonymous namespace.
212#endif
213
Tom Stellard45bb48e2015-06-13 03:28:10 +0000214GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
Tom Stellard5dde1d22016-02-05 18:29:17 +0000215 StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000216 TargetOptions Options,
217 Optional<Reloc::Model> RM,
Tom Stellard45bb48e2015-06-13 03:28:10 +0000218 CodeModel::Model CM, CodeGenOpt::Level OL)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000219 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
220
221const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
222 StringRef GPU = getGPUName(F);
223 StringRef FS = getFeatureString(F);
224
225 SmallString<128> SubtargetKey(GPU);
226 SubtargetKey.append(FS);
227
228 auto &I = SubtargetMap[SubtargetKey];
229 if (!I) {
230 // This needs to be done before we create a new subtarget since any
231 // creation will depend on the TM and the code generation flags on the
232 // function that reside in TargetOptions.
233 resetTargetOptions(F);
234 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
235
236#ifndef LLVM_BUILD_GLOBAL_ISEL
237 GISelAccessor *GISel = new GISelAccessor();
238#else
239 SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
Matt Arsenaulteb9025d2016-06-28 17:42:09 +0000240 GISel->CallLoweringInfo.reset(
241 new AMDGPUCallLowering(*I->getTargetLowering()));
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000242#endif
243
244 I->setGISelAccessor(*GISel);
245 }
246
247 return I.get();
248}
Tom Stellard45bb48e2015-06-13 03:28:10 +0000249
250//===----------------------------------------------------------------------===//
251// AMDGPU Pass Setup
252//===----------------------------------------------------------------------===//
253
254namespace {
Tom Stellardcc7067a62016-03-03 03:53:29 +0000255
Tom Stellard45bb48e2015-06-13 03:28:10 +0000256class AMDGPUPassConfig : public TargetPassConfig {
257public:
258 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
Matt Arsenault0a109002015-09-25 17:41:20 +0000259 : TargetPassConfig(TM, PM) {
260
261 // Exceptions and StackMaps are not supported, so these passes will never do
262 // anything.
263 disablePass(&StackMapLivenessID);
264 disablePass(&FuncletLayoutID);
265 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000266
267 AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
268 return getTM<AMDGPUTargetMachine>();
269 }
270
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000271 void addEarlyCSEOrGVNPass();
272 void addStraightLineScalarOptimizationPasses();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000273 void addIRPasses() override;
Matt Arsenault908b9e22016-07-01 03:33:52 +0000274 void addCodeGenPrepare() override;
Matt Arsenault0a109002015-09-25 17:41:20 +0000275 bool addPreISel() override;
276 bool addInstSelector() override;
277 bool addGCPasses() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000278};
279
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000280class R600PassConfig final : public AMDGPUPassConfig {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000281public:
282 R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
283 : AMDGPUPassConfig(TM, PM) { }
284
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000285 ScheduleDAGInstrs *createMachineScheduler(
286 MachineSchedContext *C) const override {
287 return createR600MachineScheduler(C);
288 }
289
Tom Stellard45bb48e2015-06-13 03:28:10 +0000290 bool addPreISel() override;
291 void addPreRegAlloc() override;
292 void addPreSched2() override;
293 void addPreEmitPass() override;
294};
295
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000296class GCNPassConfig final : public AMDGPUPassConfig {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000297public:
298 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
299 : AMDGPUPassConfig(TM, PM) { }
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000300
301 GCNTargetMachine &getGCNTargetMachine() const {
302 return getTM<GCNTargetMachine>();
303 }
304
305 ScheduleDAGInstrs *
Matt Arsenault03d85842016-06-27 20:32:13 +0000306 createMachineScheduler(MachineSchedContext *C) const override;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000307
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000308 void addIRPasses() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000309 bool addPreISel() override;
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000310 void addMachineSSAOptimization() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000311 bool addInstSelector() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000312#ifdef LLVM_BUILD_GLOBAL_ISEL
313 bool addIRTranslator() override;
Tim Northover33b07d62016-07-22 20:03:43 +0000314 bool addLegalizeMachineIR() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000315 bool addRegBankSelect() override;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000316 bool addGlobalInstructionSelect() override;
Tom Stellard000c5af2016-04-14 19:09:28 +0000317#endif
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000318 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
319 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000320 void addPreRegAlloc() override;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000321 void addPreSched2() override;
322 void addPreEmitPass() override;
323};
324
325} // End of anonymous namespace
326
327TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
Eric Christophera4e5d3c2015-09-16 23:38:13 +0000328 return TargetIRAnalysis([this](const Function &F) {
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000329 return TargetTransformInfo(AMDGPUTTIImpl(this, F));
Mehdi Amini5010ebf2015-07-09 02:08:42 +0000330 });
Tom Stellard45bb48e2015-06-13 03:28:10 +0000331}
332
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000333void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
334 if (getOptLevel() == CodeGenOpt::Aggressive)
335 addPass(createGVNPass());
336 else
337 addPass(createEarlyCSEPass());
338}
339
340void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
341 addPass(createSeparateConstOffsetFromGEPPass());
342 addPass(createSpeculativeExecutionPass());
343 // ReassociateGEPs exposes more opportunites for SLSR. See
344 // the example in reassociate-geps-and-slsr.ll.
345 addPass(createStraightLineStrengthReducePass());
346 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
347 // EarlyCSE can reuse.
348 addEarlyCSEOrGVNPass();
349 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
350 addPass(createNaryReassociatePass());
351 // NaryReassociate on GEPs creates redundant common expressions, so run
352 // EarlyCSE after it.
353 addPass(createEarlyCSEPass());
354}
355
Tom Stellard45bb48e2015-06-13 03:28:10 +0000356void AMDGPUPassConfig::addIRPasses() {
Matt Arsenaultbde80342016-05-18 15:41:07 +0000357 // There is no reason to run these.
358 disablePass(&StackMapLivenessID);
359 disablePass(&FuncletLayoutID);
360 disablePass(&PatchableFunctionID);
361
Tom Stellard45bb48e2015-06-13 03:28:10 +0000362 // Function calls are not supported, so make sure we inline everything.
363 addPass(createAMDGPUAlwaysInlinePass());
Chandler Carruth67fc52f2016-08-17 02:56:20 +0000364 addPass(createAlwaysInlinerLegacyPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000365 // We need to add the barrier noop pass, otherwise adding the function
366 // inlining pass will cause all of the PassConfigs passes to be run
367 // one function at a time, which means if we have a nodule with two
368 // functions, then we will generate code for the first function
369 // without ever running any passes on the second.
370 addPass(createBarrierNoopPass());
Matt Arsenault39319482015-11-06 18:01:57 +0000371
Tom Stellardfd253952015-08-07 23:19:30 +0000372 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
373 addPass(createAMDGPUOpenCLImageTypeLoweringPass());
Matt Arsenault39319482015-11-06 18:01:57 +0000374
Matt Arsenaulte0132462016-01-30 05:19:45 +0000375 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
Matt Arsenault03d85842016-06-27 20:32:13 +0000376 if (TM.getOptLevel() > CodeGenOpt::None) {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000377 addPass(createAMDGPUPromoteAlloca(&TM));
Matt Arsenault03d85842016-06-27 20:32:13 +0000378
379 if (EnableSROA)
380 addPass(createSROAPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000381 }
Matt Arsenaultf42c6922016-06-15 00:11:01 +0000382
383 addStraightLineScalarOptimizationPasses();
384
385 TargetPassConfig::addIRPasses();
386
387 // EarlyCSE is not always strong enough to clean up what LSR produces. For
388 // example, GVN can combine
389 //
390 // %0 = add %a, %b
391 // %1 = add %b, %a
392 //
393 // and
394 //
395 // %0 = shl nsw %a, 2
396 // %1 = shl %a, 2
397 //
398 // but EarlyCSE can do neither of them.
399 if (getOptLevel() != CodeGenOpt::None)
400 addEarlyCSEOrGVNPass();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000401}
402
Matt Arsenault908b9e22016-07-01 03:33:52 +0000403void AMDGPUPassConfig::addCodeGenPrepare() {
404 TargetPassConfig::addCodeGenPrepare();
405
406 if (EnableLoadStoreVectorizer)
407 addPass(createLoadStoreVectorizerPass());
408}
409
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000410bool AMDGPUPassConfig::addPreISel() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000411 addPass(createFlattenCFGPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000412 return false;
413}
414
415bool AMDGPUPassConfig::addInstSelector() {
416 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
417 return false;
418}
419
Matt Arsenault0a109002015-09-25 17:41:20 +0000420bool AMDGPUPassConfig::addGCPasses() {
421 // Do nothing. GC is not supported.
422 return false;
423}
424
Tom Stellard45bb48e2015-06-13 03:28:10 +0000425//===----------------------------------------------------------------------===//
426// R600 Pass Setup
427//===----------------------------------------------------------------------===//
428
429bool R600PassConfig::addPreISel() {
430 AMDGPUPassConfig::addPreISel();
Matt Arsenaultc5816112016-06-24 06:30:22 +0000431
432 if (EnableR600StructurizeCFG)
Tom Stellardbc4497b2016-02-12 23:45:29 +0000433 addPass(createStructurizeCFGPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000434 return false;
435}
436
437void R600PassConfig::addPreRegAlloc() {
438 addPass(createR600VectorRegMerger(*TM));
439}
440
441void R600PassConfig::addPreSched2() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000442 addPass(createR600EmitClauseMarkers(), false);
Matt Arsenault03d85842016-06-27 20:32:13 +0000443 if (EnableR600IfConvert)
Tom Stellard45bb48e2015-06-13 03:28:10 +0000444 addPass(&IfConverterID, false);
445 addPass(createR600ClauseMergePass(*TM), false);
446}
447
448void R600PassConfig::addPreEmitPass() {
449 addPass(createAMDGPUCFGStructurizerPass(), false);
450 addPass(createR600ExpandSpecialInstrsPass(*TM), false);
451 addPass(&FinalizeMachineBundlesID, false);
452 addPass(createR600Packetizer(*TM), false);
453 addPass(createR600ControlFlowFinalizer(*TM), false);
454}
455
456TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
457 return new R600PassConfig(this, PM);
458}
459
460//===----------------------------------------------------------------------===//
461// GCN Pass Setup
462//===----------------------------------------------------------------------===//
463
Matt Arsenault03d85842016-06-27 20:32:13 +0000464ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
465 MachineSchedContext *C) const {
466 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
467 if (ST.enableSIScheduler())
468 return createSIMachineScheduler(C);
469 return nullptr;
470}
471
Tom Stellard45bb48e2015-06-13 03:28:10 +0000472bool GCNPassConfig::addPreISel() {
473 AMDGPUPassConfig::addPreISel();
Matt Arsenault39319482015-11-06 18:01:57 +0000474
475 // FIXME: We need to run a pass to propagate the attributes when calls are
476 // supported.
477 addPass(&AMDGPUAnnotateKernelFeaturesID);
Tom Stellardbc4497b2016-02-12 23:45:29 +0000478 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
Tom Stellard45bb48e2015-06-13 03:28:10 +0000479 addPass(createSinkingPass());
480 addPass(createSITypeRewriter());
Tom Stellarda6f24c62015-12-15 20:55:55 +0000481 addPass(createAMDGPUAnnotateUniformValues());
Tom Stellardbc4497b2016-02-12 23:45:29 +0000482 addPass(createSIAnnotateControlFlowPass());
Tom Stellarda6f24c62015-12-15 20:55:55 +0000483
Tom Stellard45bb48e2015-06-13 03:28:10 +0000484 return false;
485}
486
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000487void GCNPassConfig::addMachineSSAOptimization() {
488 TargetPassConfig::addMachineSSAOptimization();
489
490 // We want to fold operands after PeepholeOptimizer has run (or as part of
491 // it), because it will eliminate extra copies making it easier to fold the
492 // real source operand. We want to eliminate dead instructions after, so that
493 // we see fewer uses of the copies. We then need to clean up the dead
494 // instructions leftover after the operands are folded as well.
495 //
496 // XXX - Can we get away without running DeadMachineInstructionElim again?
497 addPass(&SIFoldOperandsID);
498 addPass(&DeadMachineInstructionElimID);
499}
500
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000501void GCNPassConfig::addIRPasses() {
502 // TODO: May want to move later or split into an early and late one.
503 addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
504
505 AMDGPUPassConfig::addIRPasses();
506}
507
Tom Stellard45bb48e2015-06-13 03:28:10 +0000508bool GCNPassConfig::addInstSelector() {
509 AMDGPUPassConfig::addInstSelector();
510 addPass(createSILowerI1CopiesPass());
Matt Arsenault782c03b2015-11-03 22:30:13 +0000511 addPass(&SIFixSGPRCopiesID);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000512 return false;
513}
514
Tom Stellard000c5af2016-04-14 19:09:28 +0000515#ifdef LLVM_BUILD_GLOBAL_ISEL
516bool GCNPassConfig::addIRTranslator() {
517 addPass(new IRTranslator());
518 return false;
519}
520
Tim Northover33b07d62016-07-22 20:03:43 +0000521bool GCNPassConfig::addLegalizeMachineIR() {
522 return false;
523}
524
Tom Stellard000c5af2016-04-14 19:09:28 +0000525bool GCNPassConfig::addRegBankSelect() {
526 return false;
527}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000528
529bool GCNPassConfig::addGlobalInstructionSelect() {
530 return false;
531}
Tom Stellard000c5af2016-04-14 19:09:28 +0000532#endif
533
Tom Stellard45bb48e2015-06-13 03:28:10 +0000534void GCNPassConfig::addPreRegAlloc() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000535 // This needs to be run directly before register allocation because
536 // earlier passes might recompute live intervals.
537 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
538 if (getOptLevel() > CodeGenOpt::None) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000539 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
540 }
541
Matt Arsenault03d85842016-06-27 20:32:13 +0000542 if (getOptLevel() > CodeGenOpt::None) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000543 // Don't do this with no optimizations since it throws away debug info by
544 // merging nonadjacent loads.
545
546 // This should be run after scheduling, but before register allocation. It
547 // also need extra copies to the address operand to be eliminated.
Matt Arsenault03d85842016-06-27 20:32:13 +0000548
549 // FIXME: Move pre-RA and remove extra reg coalescer run.
Tom Stellard45bb48e2015-06-13 03:28:10 +0000550 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
Matt Arsenault84db5d92015-07-14 17:57:36 +0000551 insertPass(&MachineSchedulerID, &RegisterCoalescerID);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000552 }
Matt Arsenault03d85842016-06-27 20:32:13 +0000553
Matt Arsenault4a07bf62016-06-22 20:26:24 +0000554 addPass(createSIShrinkInstructionsPass());
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000555 addPass(createSIWholeQuadModePass());
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000556}
557
558void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000559 TargetPassConfig::addFastRegAlloc(RegAllocPass);
560}
561
562void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
Matt Arsenaultb87fc222015-10-01 22:10:03 +0000563 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000564}
565
Tom Stellard45bb48e2015-06-13 03:28:10 +0000566void GCNPassConfig::addPreSched2() {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000567}
568
569void GCNPassConfig::addPreEmitPass() {
Tom Stellardcb6ba622016-04-30 00:23:06 +0000570 // The hazard recognizer that runs as part of the post-ra scheduler does not
Matt Arsenault254a6452016-06-28 16:59:53 +0000571 // guarantee to be able handle all hazards correctly. This is because if there
572 // are multiple scheduling regions in a basic block, the regions are scheduled
573 // bottom up, so when we begin to schedule a region we don't know what
574 // instructions were emitted directly before it.
Tom Stellardcb6ba622016-04-30 00:23:06 +0000575 //
Matt Arsenault254a6452016-06-28 16:59:53 +0000576 // Here we add a stand-alone hazard recognizer pass which can handle all
577 // cases.
Tom Stellardcb6ba622016-04-30 00:23:06 +0000578 addPass(&PostRAHazardRecognizerID);
579
Matt Arsenaulte2bd9a32016-06-09 23:19:14 +0000580 addPass(createSIInsertWaitsPass());
Matt Arsenaultcf2744f2016-04-29 20:23:42 +0000581 addPass(createSIShrinkInstructionsPass());
Matt Arsenault9babdf42016-06-22 20:15:28 +0000582 addPass(createSILowerControlFlowPass());
583 addPass(createSIDebuggerInsertNopsPass());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000584}
585
586TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
587 return new GCNPassConfig(this, PM);
588}