blob: 4e016525f7e4e6bc1fea1dad2b68c2b58904541b [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//
11//===----------------------------------------------------------------------===//
12
Quentin Colombet7a43edd2017-05-27 01:34:07 +000013#include "AArch64TargetMachine.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000014#include "AArch64.h"
Evandro Menezes94edf022017-02-01 02:54:34 +000015#include "AArch64MacroFusion.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000016#include "AArch64Subtarget.h"
Aditya Nandakumara2719322014-11-13 09:26:31 +000017#include "AArch64TargetObjectFile.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000018#include "AArch64TargetTransformInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000019#include "MCTargetDesc/AArch64MCTargetDesc.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Triple.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
Quentin Colombet846219a2016-04-07 21:24:40 +000023#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000024#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
Tim Northover69fa84a2016-10-14 22:18:18 +000025#include "llvm/CodeGen/GlobalISel/Legalizer.h"
Quentin Colombet7a43edd2017-05-27 01:34:07 +000026#include "llvm/CodeGen/GlobalISel/Localizer.h"
Quentin Colombetd4131812016-04-07 20:27:33 +000027#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
Matthias Braun115efcd2016-11-28 20:11:54 +000028#include "llvm/CodeGen/MachineScheduler.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000029#include "llvm/CodeGen/Passes.h"
Matthias Braun31d19d42016-05-10 03:21:59 +000030#include "llvm/CodeGen/TargetPassConfig.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000031#include "llvm/IR/Attributes.h"
Eric Christopher3faf2f12014-10-06 06:45:36 +000032#include "llvm/IR/Function.h"
Eli Friedman0917d0c2018-11-07 22:30:56 +000033#include "llvm/MC/MCAsmInfo.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000034#include "llvm/MC/MCTargetOptions.h"
35#include "llvm/Pass.h"
36#include "llvm/Support/CodeGen.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000037#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/TargetRegistry.h"
David Blaikie6054e652018-03-23 23:58:19 +000039#include "llvm/Target/TargetLoweringObjectFile.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000040#include "llvm/Target/TargetOptions.h"
41#include "llvm/Transforms/Scalar.h"
Eugene Zelenko049b0172017-01-06 00:30:53 +000042#include <memory>
43#include <string>
44
Tim Northover3b0846e2014-05-24 12:50:23 +000045using namespace llvm;
46
Diana Picus850043b2016-08-01 05:56:57 +000047static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
48 cl::desc("Enable the CCMP formation pass"),
49 cl::init(true), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000050
Chad Rosier6db9ff62017-06-23 19:20:12 +000051static cl::opt<bool>
52 EnableCondBrTuning("aarch64-enable-cond-br-tune",
53 cl::desc("Enable the conditional branch tuning pass"),
54 cl::init(true), cl::Hidden);
55
Diana Picus850043b2016-08-01 05:56:57 +000056static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +000057 cl::desc("Enable the machine combiner pass"),
58 cl::init(true), cl::Hidden);
59
Diana Picus850043b2016-08-01 05:56:57 +000060static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
61 cl::desc("Suppress STP for AArch64"),
62 cl::init(true), cl::Hidden);
63
64static cl::opt<bool> EnableAdvSIMDScalar(
65 "aarch64-enable-simd-scalar",
66 cl::desc("Enable use of AdvSIMD scalar integer instructions"),
67 cl::init(false), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000068
69static cl::opt<bool>
Diana Picus850043b2016-08-01 05:56:57 +000070 EnablePromoteConstant("aarch64-enable-promote-const",
71 cl::desc("Enable the promote constant pass"),
72 cl::init(true), cl::Hidden);
73
74static cl::opt<bool> EnableCollectLOH(
75 "aarch64-enable-collect-loh",
76 cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
77 cl::init(true), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000078
79static cl::opt<bool>
Diana Picus850043b2016-08-01 05:56:57 +000080 EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
81 cl::desc("Enable the pass that removes dead"
82 " definitons and replaces stores to"
83 " them with stores to the zero"
84 " register"),
85 cl::init(true));
Tim Northover3b0846e2014-05-24 12:50:23 +000086
Diana Picus850043b2016-08-01 05:56:57 +000087static cl::opt<bool> EnableRedundantCopyElimination(
88 "aarch64-enable-copyelim",
89 cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
90 cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000091
Diana Picus850043b2016-08-01 05:56:57 +000092static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
93 cl::desc("Enable the load/store pair"
94 " optimization pass"),
95 cl::init(true), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000096
Diana Picus850043b2016-08-01 05:56:57 +000097static cl::opt<bool> EnableAtomicTidy(
98 "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
99 cl::desc("Run SimplifyCFG after expanding atomic operations"
100 " to make use of cmpxchg flow-based information"),
101 cl::init(true));
Tim Northoverb4ddc082014-05-30 10:09:59 +0000102
James Molloy99917942014-08-06 13:31:32 +0000103static cl::opt<bool>
104EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
105 cl::desc("Run early if-conversion"),
106 cl::init(true));
107
Jiangning Liu1a486da2014-09-05 02:55:24 +0000108static cl::opt<bool>
Diana Picus850043b2016-08-01 05:56:57 +0000109 EnableCondOpt("aarch64-enable-condopt",
110 cl::desc("Enable the condition optimizer pass"),
111 cl::init(true), cl::Hidden);
Jiangning Liu1a486da2014-09-05 02:55:24 +0000112
Arnaud A. de Grandmaisonc75dbbb2014-09-10 14:06:10 +0000113static cl::opt<bool>
Bradley Smithf2a801d2014-10-13 10:12:35 +0000114EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
115 cl::desc("Work around Cortex-A53 erratum 835769"),
116 cl::init(false));
117
Hao Liufd46bea2014-11-19 06:39:53 +0000118static cl::opt<bool>
Diana Picus850043b2016-08-01 05:56:57 +0000119 EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
120 cl::desc("Enable optimizations on complex GEPs"),
121 cl::init(false));
122
123static cl::opt<bool>
124 BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
125 cl::desc("Relax out of range conditional branches"));
Hao Liufd46bea2014-11-19 06:39:53 +0000126
Tim Northover1c353412018-10-24 20:19:09 +0000127static cl::opt<bool> EnableCompressJumpTables(
128 "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
129 cl::desc("Use smallest entry possible for jump tables"));
130
Ahmed Bougachab96444e2015-04-11 00:06:36 +0000131// FIXME: Unify control over GlobalMerge.
132static cl::opt<cl::boolOrDefault>
Diana Picus850043b2016-08-01 05:56:57 +0000133 EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
134 cl::desc("Enable the global merge pass"));
Ahmed Bougachab96444e2015-04-11 00:06:36 +0000135
Adam Nemet53e758f2016-03-18 00:27:29 +0000136static cl::opt<bool>
Diana Picus850043b2016-08-01 05:56:57 +0000137 EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
Adam Nemet53e758f2016-03-18 00:27:29 +0000138 cl::desc("Enable the loop data prefetch pass"),
Adam Nemetfb8fbba52016-03-30 00:21:29 +0000139 cl::init(true));
Adam Nemet53e758f2016-03-18 00:27:29 +0000140
Ahmed Bougacha120ae222017-03-01 23:33:08 +0000141static cl::opt<int> EnableGlobalISelAtO(
142 "aarch64-enable-global-isel-at-O", cl::Hidden,
143 cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
Amara Emerson854d10d2018-01-02 16:30:47 +0000144 cl::init(0));
Ahmed Bougacha120ae222017-03-01 23:33:08 +0000145
Geoff Berryb1e87142017-07-14 21:44:12 +0000146static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
147 cl::init(true), cl::Hidden);
148
Oliver Stannard250e5a52018-10-08 14:04:24 +0000149static cl::opt<bool>
150 EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
151 cl::desc("Enable the AAcrh64 branch target pass"),
152 cl::init(true));
153
Tim Northover3b0846e2014-05-24 12:50:23 +0000154extern "C" void LLVMInitializeAArch64Target() {
155 // Register the target.
Mehdi Aminif42454b2016-10-09 23:00:34 +0000156 RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
157 RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
158 RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
Tim Northover5dad9df2016-04-01 23:14:52 +0000159 auto PR = PassRegistry::getPassRegistry();
160 initializeGlobalISel(*PR);
Diana Picus850043b2016-08-01 05:56:57 +0000161 initializeAArch64A53Fix835769Pass(*PR);
162 initializeAArch64A57FPLoadBalancingPass(*PR);
Diana Picus850043b2016-08-01 05:56:57 +0000163 initializeAArch64AdvSIMDScalarPass(*PR);
Oliver Stannard250e5a52018-10-08 14:04:24 +0000164 initializeAArch64BranchTargetsPass(*PR);
Diana Picus850043b2016-08-01 05:56:57 +0000165 initializeAArch64CollectLOHPass(*PR);
Tim Northover1c353412018-10-24 20:19:09 +0000166 initializeAArch64CompressJumpTablesPass(*PR);
Diana Picus850043b2016-08-01 05:56:57 +0000167 initializeAArch64ConditionalComparesPass(*PR);
168 initializeAArch64ConditionOptimizerPass(*PR);
169 initializeAArch64DeadRegisterDefinitionsPass(*PR);
Tim Northover5dad9df2016-04-01 23:14:52 +0000170 initializeAArch64ExpandPseudoPass(*PR);
Geoff Berry24c81e82016-07-20 21:45:58 +0000171 initializeAArch64LoadStoreOptPass(*PR);
Abderrazek Zaafrani2c80e4c2017-12-08 00:58:49 +0000172 initializeAArch64SIMDInstrOptPass(*PR);
Daniel Sandersc973ad12018-10-03 02:12:17 +0000173 initializeAArch64PreLegalizerCombinerPass(*PR);
Diana Picus850043b2016-08-01 05:56:57 +0000174 initializeAArch64PromoteConstantPass(*PR);
175 initializeAArch64RedundantCopyEliminationPass(*PR);
176 initializeAArch64StorePairSuppressPass(*PR);
Geoff Berry9962fae2017-07-18 16:14:22 +0000177 initializeFalkorHWPFFixPass(*PR);
Geoff Berryb1e87142017-07-14 21:44:12 +0000178 initializeFalkorMarkStridedAccessesLegacyPass(*PR);
Diana Picus850043b2016-08-01 05:56:57 +0000179 initializeLDTLSCleanupPass(*PR);
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000180 initializeAArch64SpeculationHardeningPass(*PR);
Tim Northover3b0846e2014-05-24 12:50:23 +0000181}
182
Aditya Nandakumara2719322014-11-13 09:26:31 +0000183//===----------------------------------------------------------------------===//
184// AArch64 Lowering public interface.
185//===----------------------------------------------------------------------===//
186static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
187 if (TT.isOSBinFormatMachO())
Eugene Zelenko049b0172017-01-06 00:30:53 +0000188 return llvm::make_unique<AArch64_MachoTargetObjectFile>();
Mandeep Singh Grang0c721722017-06-27 23:58:19 +0000189 if (TT.isOSBinFormatCOFF())
190 return llvm::make_unique<AArch64_COFFTargetObjectFile>();
Aditya Nandakumara2719322014-11-13 09:26:31 +0000191
Eugene Zelenko049b0172017-01-06 00:30:53 +0000192 return llvm::make_unique<AArch64_ELFTargetObjectFile>();
Aditya Nandakumara2719322014-11-13 09:26:31 +0000193}
194
Mehdi Amini93e1ea12015-03-12 00:07:24 +0000195// Helper function to build a DataLayout string
Joel Jones504bf332016-10-24 13:37:13 +0000196static std::string computeDataLayout(const Triple &TT,
197 const MCTargetOptions &Options,
198 bool LittleEndian) {
199 if (Options.getABIName() == "ilp32")
200 return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
Daniel Sandersed64d622015-06-11 15:34:59 +0000201 if (TT.isOSBinFormatMachO())
Mehdi Amini93e1ea12015-03-12 00:07:24 +0000202 return "e-m:o-i64:64-i128:128-n32:64-S128";
Mandeep Singh Grang0c721722017-06-27 23:58:19 +0000203 if (TT.isOSBinFormatCOFF())
Mandeep Singh Grang6d6f2fa2017-07-17 21:25:19 +0000204 return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
Mehdi Amini93e1ea12015-03-12 00:07:24 +0000205 if (LittleEndian)
Chad Rosier112d0e92016-07-07 20:02:18 +0000206 return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
207 return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
Mehdi Amini93e1ea12015-03-12 00:07:24 +0000208}
209
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000210static Reloc::Model getEffectiveRelocModel(const Triple &TT,
211 Optional<Reloc::Model> RM) {
212 // AArch64 Darwin is always PIC.
213 if (TT.isOSDarwin())
214 return Reloc::PIC_;
215 // On ELF platforms the default static relocation model has a smart enough
216 // linker to cope with referencing external symbols defined in a shared
217 // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
218 if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
219 return Reloc::Static;
220 return *RM;
221}
222
David Greenca29c272018-12-07 12:10:23 +0000223static CodeModel::Model
224getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
225 bool JIT) {
Rafael Espindola79e238a2017-08-03 02:16:21 +0000226 if (CM) {
David Green9dd1d452018-08-22 11:31:39 +0000227 if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
228 *CM != CodeModel::Large) {
Rafael Espindola79e238a2017-08-03 02:16:21 +0000229 if (!TT.isOSFuchsia())
230 report_fatal_error(
David Green9dd1d452018-08-22 11:31:39 +0000231 "Only small, tiny and large code models are allowed on AArch64");
232 else if (*CM != CodeModel::Kernel)
233 report_fatal_error("Only small, tiny, kernel, and large code models "
234 "are allowed on AArch64");
235 } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
236 report_fatal_error("tiny code model is only supported on ELF");
Rafael Espindola79e238a2017-08-03 02:16:21 +0000237 return *CM;
238 }
239 // The default MCJIT memory managers make no guarantees about where they can
240 // find an executable page; JITed code needs to be able to refer to globals
241 // no matter how far away they are.
242 if (JIT)
243 return CodeModel::Large;
244 return CodeModel::Small;
245}
246
Rafael Espindola38af4d62016-05-18 16:00:24 +0000247/// Create an AArch64 architecture model.
Tim Northover3b0846e2014-05-24 12:50:23 +0000248///
Rafael Espindola79e238a2017-08-03 02:16:21 +0000249AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
250 StringRef CPU, StringRef FS,
251 const TargetOptions &Options,
252 Optional<Reloc::Model> RM,
253 Optional<CodeModel::Model> CM,
254 CodeGenOpt::Level OL, bool JIT,
255 bool LittleEndian)
Matthias Braunbb8507e2017-10-12 22:57:28 +0000256 : LLVMTargetMachine(T,
257 computeDataLayout(TT, Options.MCOptions, LittleEndian),
258 TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
David Greenca29c272018-12-07 12:10:23 +0000259 getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
Rafael Espindola79e238a2017-08-03 02:16:21 +0000260 TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000261 initAsmInfo();
Volkan Kelesa79b0622018-01-17 22:34:21 +0000262
Matthias Braunda5e7e12018-06-28 17:00:45 +0000263 if (TT.isOSBinFormatMachO()) {
Tim Northover271d3d22018-04-13 22:25:20 +0000264 this->Options.TrapUnreachable = true;
Matthias Braunda5e7e12018-06-28 17:00:45 +0000265 this->Options.NoTrapAfterNoreturn = true;
266 }
Tim Northover271d3d22018-04-13 22:25:20 +0000267
Eli Friedman0917d0c2018-11-07 22:30:56 +0000268 if (getMCAsmInfo()->usesWindowsCFI()) {
Eli Friedmand00fb2e2018-11-07 21:31:14 +0000269 // Unwinding can get confused if the last instruction in an
270 // exception-handling region (function, funclet, try block, etc.)
271 // is a call.
272 //
273 // FIXME: We could elide the trap if the next instruction would be in
274 // the same region anyway.
275 this->Options.TrapUnreachable = true;
276 }
277
Volkan Kelesa79b0622018-01-17 22:34:21 +0000278 // Enable GlobalISel at or below EnableGlobalISelAt0.
Petr Pavlue6406d52018-11-29 12:56:32 +0000279 if (getOptLevel() <= EnableGlobalISelAtO) {
Volkan Kelesa79b0622018-01-17 22:34:21 +0000280 setGlobalISel(true);
Petr Pavlue6406d52018-11-29 12:56:32 +0000281 setGlobalISelAbort(GlobalISelAbortMode::Disable);
282 }
Jessica Paquettedafa1982018-06-28 17:45:43 +0000283
284 // AArch64 supports the MachineOutliner.
285 setMachineOutliner(true);
Jessica Paquettef90edbe2018-07-27 20:18:27 +0000286
287 // AArch64 supports default outlining behaviour.
288 setSupportsDefaultOutlining(true);
Tim Northover3b0846e2014-05-24 12:50:23 +0000289}
290
Eugene Zelenko049b0172017-01-06 00:30:53 +0000291AArch64TargetMachine::~AArch64TargetMachine() = default;
Reid Kleckner357600e2014-11-20 23:37:18 +0000292
Eric Christopher3faf2f12014-10-06 06:45:36 +0000293const AArch64Subtarget *
294AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Duncan P. N. Exon Smith003bb7d2015-02-14 02:09:06 +0000295 Attribute CPUAttr = F.getFnAttribute("target-cpu");
296 Attribute FSAttr = F.getFnAttribute("target-features");
Eric Christopher3faf2f12014-10-06 06:45:36 +0000297
298 std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
299 ? CPUAttr.getValueAsString().str()
300 : TargetCPU;
301 std::string FS = !FSAttr.hasAttribute(Attribute::None)
302 ? FSAttr.getValueAsString().str()
303 : TargetFS;
304
Daniel Sandersa1b2db792017-05-19 11:08:33 +0000305 auto &I = SubtargetMap[CPU + FS];
Eric Christopher3faf2f12014-10-06 06:45:36 +0000306 if (!I) {
307 // This needs to be done before we create a new subtarget since any
308 // creation will depend on the TM and the code generation flags on the
309 // function that reside in TargetOptions.
310 resetTargetOptions(F);
Daniel Sandersc81f4502015-06-16 15:44:21 +0000311 I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
Daniel Sandersa1b2db792017-05-19 11:08:33 +0000312 isLittle);
Eric Christopher3faf2f12014-10-06 06:45:36 +0000313 }
314 return I.get();
315}
316
Tim Northover3b0846e2014-05-24 12:50:23 +0000317void AArch64leTargetMachine::anchor() { }
318
Daniel Sanders3e5de882015-06-11 19:41:26 +0000319AArch64leTargetMachine::AArch64leTargetMachine(
320 const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000321 const TargetOptions &Options, Optional<Reloc::Model> RM,
Rafael Espindola79e238a2017-08-03 02:16:21 +0000322 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
323 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
Tim Northover3b0846e2014-05-24 12:50:23 +0000324
325void AArch64beTargetMachine::anchor() { }
326
Daniel Sanders3e5de882015-06-11 19:41:26 +0000327AArch64beTargetMachine::AArch64beTargetMachine(
328 const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
Rafael Espindola8c34dd82016-05-18 22:04:49 +0000329 const TargetOptions &Options, Optional<Reloc::Model> RM,
Rafael Espindola79e238a2017-08-03 02:16:21 +0000330 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
331 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
Tim Northover3b0846e2014-05-24 12:50:23 +0000332
333namespace {
Eugene Zelenko049b0172017-01-06 00:30:53 +0000334
Tim Northover3b0846e2014-05-24 12:50:23 +0000335/// AArch64 Code Generator Pass Configuration Options.
336class AArch64PassConfig : public TargetPassConfig {
337public:
Matthias Braun5e394c32017-05-30 21:36:41 +0000338 AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
Chad Rosier486e0872014-09-12 17:40:39 +0000339 : TargetPassConfig(TM, PM) {
Matthias Braun5e394c32017-05-30 21:36:41 +0000340 if (TM.getOptLevel() != CodeGenOpt::None)
Chad Rosier347ed4e2014-09-12 22:17:28 +0000341 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
Chad Rosier486e0872014-09-12 17:40:39 +0000342 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000343
344 AArch64TargetMachine &getAArch64TargetMachine() const {
345 return getTM<AArch64TargetMachine>();
346 }
347
Matthias Braun115efcd2016-11-28 20:11:54 +0000348 ScheduleDAGInstrs *
349 createMachineScheduler(MachineSchedContext *C) const override {
Florian Hahn15be1ac2017-07-12 21:41:28 +0000350 const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
Matthias Braun115efcd2016-11-28 20:11:54 +0000351 ScheduleDAGMILive *DAG = createGenericSchedLive(C);
352 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
353 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
Florian Hahn15be1ac2017-07-12 21:41:28 +0000354 if (ST.hasFusion())
355 DAG->addMutation(createAArch64MacroFusionDAGMutation());
Matthias Braun115efcd2016-11-28 20:11:54 +0000356 return DAG;
357 }
358
Evandro Menezes455382e2017-02-01 02:54:42 +0000359 ScheduleDAGInstrs *
360 createPostMachineScheduler(MachineSchedContext *C) const override {
361 const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
Florian Hahnf934add2017-07-12 20:53:22 +0000362 if (ST.hasFusion()) {
Evandro Menezes455382e2017-02-01 02:54:42 +0000363 // Run the Macro Fusion after RA again since literals are expanded from
364 // pseudos then (v. addPreSched2()).
365 ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
366 DAG->addMutation(createAArch64MacroFusionDAGMutation());
367 return DAG;
368 }
369
370 return nullptr;
371 }
372
Tim Northoverb4ddc082014-05-30 10:09:59 +0000373 void addIRPasses() override;
Tim Northover3b0846e2014-05-24 12:50:23 +0000374 bool addPreISel() override;
375 bool addInstSelector() override;
Quentin Colombetd96f4952016-02-11 19:35:06 +0000376 bool addIRTranslator() override;
Daniel Sandersc973ad12018-10-03 02:12:17 +0000377 void addPreLegalizeMachineIR() override;
Tim Northover33b07d62016-07-22 20:03:43 +0000378 bool addLegalizeMachineIR() override;
Quentin Colombetd4131812016-04-07 20:27:33 +0000379 bool addRegBankSelect() override;
Quentin Colombet7a43edd2017-05-27 01:34:07 +0000380 void addPreGlobalInstructionSelect() override;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000381 bool addGlobalInstructionSelect() override;
Tim Northover3b0846e2014-05-24 12:50:23 +0000382 bool addILPOpts() override;
Matthias Braun7e37a5f2014-12-11 21:26:47 +0000383 void addPreRegAlloc() override;
384 void addPostRegAlloc() override;
385 void addPreSched2() override;
386 void addPreEmitPass() override;
Tim Northover3b0846e2014-05-24 12:50:23 +0000387};
Eugene Zelenko049b0172017-01-06 00:30:53 +0000388
389} // end anonymous namespace
Tim Northover3b0846e2014-05-24 12:50:23 +0000390
Sanjoy Das26d11ca2017-12-22 18:21:59 +0000391TargetTransformInfo
392AArch64TargetMachine::getTargetTransformInfo(const Function &F) {
393 return TargetTransformInfo(AArch64TTIImpl(this, F));
Tim Northover3b0846e2014-05-24 12:50:23 +0000394}
395
396TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
Matthias Braun5e394c32017-05-30 21:36:41 +0000397 return new AArch64PassConfig(*this, PM);
Tim Northover3b0846e2014-05-24 12:50:23 +0000398}
399
Tim Northoverb4ddc082014-05-30 10:09:59 +0000400void AArch64PassConfig::addIRPasses() {
401 // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
402 // ourselves.
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000403 addPass(createAtomicExpandPass());
Tim Northoverb4ddc082014-05-30 10:09:59 +0000404
405 // Cmpxchg instructions are often used with a subsequent comparison to
406 // determine whether it succeeded. We can exploit existing control-flow in
407 // ldrex/strex loops to simplify this, but it needs tidying up.
408 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
Sanjay Patel0ab0c1a2017-12-14 22:05:20 +0000409 addPass(createCFGSimplificationPass(1, true, true, false, true));
Tim Northoverb4ddc082014-05-30 10:09:59 +0000410
Junmo Park384d3762016-07-06 23:18:58 +0000411 // Run LoopDataPrefetch
Adam Nemet53e758f2016-03-18 00:27:29 +0000412 //
413 // Run this before LSR to remove the multiplies involved in computing the
414 // pointer values N iterations ahead.
Geoff Berryb1e87142017-07-14 21:44:12 +0000415 if (TM->getOptLevel() != CodeGenOpt::None) {
416 if (EnableLoopDataPrefetch)
417 addPass(createLoopDataPrefetchPass());
418 if (EnableFalkorHWPFFix)
419 addPass(createFalkorMarkStridedAccessesPass());
420 }
Adam Nemet53e758f2016-03-18 00:27:29 +0000421
Tim Northoverb4ddc082014-05-30 10:09:59 +0000422 TargetPassConfig::addIRPasses();
Hao Liufd46bea2014-11-19 06:39:53 +0000423
Hao Liu7ec8ee32015-06-26 02:32:07 +0000424 // Match interleaved memory accesses to ldN/stN intrinsics.
Martin Elshuberfef30362018-11-19 14:26:10 +0000425 if (TM->getOptLevel() != CodeGenOpt::None) {
426 addPass(createInterleavedLoadCombinePass());
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000427 addPass(createInterleavedAccessPass());
Martin Elshuberfef30362018-11-19 14:26:10 +0000428 }
Hao Liu7ec8ee32015-06-26 02:32:07 +0000429
Hao Liufd46bea2014-11-19 06:39:53 +0000430 if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
431 // Call SeparateConstOffsetFromGEP pass to extract constants within indices
432 // and lower a GEP with multiple indices to either arithmetic operations or
433 // multiple GEPs with single index.
David Blaikie8ad9a972018-03-28 22:28:50 +0000434 addPass(createSeparateConstOffsetFromGEPPass(true));
Hao Liufd46bea2014-11-19 06:39:53 +0000435 // Call EarlyCSE pass to find and remove subexpressions in the lowered
436 // result.
437 addPass(createEarlyCSEPass());
438 // Do loop invariant code motion in case part of the lowered result is
439 // invariant.
440 addPass(createLICMPass());
441 }
Tim Northoverb4ddc082014-05-30 10:09:59 +0000442}
443
Tim Northover3b0846e2014-05-24 12:50:23 +0000444// Pass Pipeline Configuration
445bool AArch64PassConfig::addPreISel() {
446 // Run promote constant before global merge, so that the promoted constants
447 // get a chance to be merged
448 if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
449 addPass(createAArch64PromoteConstantPass());
Eric Christophered47b222015-02-23 19:28:45 +0000450 // FIXME: On AArch64, this depends on the type.
451 // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
452 // and the offset has to be a multiple of the related size in bytes.
Ahmed Bougacha82076412015-06-04 20:39:23 +0000453 if ((TM->getOptLevel() != CodeGenOpt::None &&
Ahmed Bougachab96444e2015-04-11 00:06:36 +0000454 EnableGlobalMerge == cl::BOU_UNSET) ||
Ahmed Bougacha82076412015-06-04 20:39:23 +0000455 EnableGlobalMerge == cl::BOU_TRUE) {
456 bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
457 (EnableGlobalMerge == cl::BOU_UNSET);
458 addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
459 }
460
Tim Northover3b0846e2014-05-24 12:50:23 +0000461 return false;
462}
463
464bool AArch64PassConfig::addInstSelector() {
465 addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
466
467 // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
468 // references to _TLS_MODULE_BASE_ as possible.
Daniel Sandersc81f4502015-06-16 15:44:21 +0000469 if (TM->getTargetTriple().isOSBinFormatELF() &&
Tim Northover3b0846e2014-05-24 12:50:23 +0000470 getOptLevel() != CodeGenOpt::None)
471 addPass(createAArch64CleanupLocalDynamicTLSPass());
472
473 return false;
474}
475
Quentin Colombetd96f4952016-02-11 19:35:06 +0000476bool AArch64PassConfig::addIRTranslator() {
477 addPass(new IRTranslator());
478 return false;
479}
Eugene Zelenko049b0172017-01-06 00:30:53 +0000480
Daniel Sandersc973ad12018-10-03 02:12:17 +0000481void AArch64PassConfig::addPreLegalizeMachineIR() {
482 addPass(createAArch64PreLegalizeCombiner());
483}
484
Tim Northover33b07d62016-07-22 20:03:43 +0000485bool AArch64PassConfig::addLegalizeMachineIR() {
Tim Northover69fa84a2016-10-14 22:18:18 +0000486 addPass(new Legalizer());
Tim Northover33b07d62016-07-22 20:03:43 +0000487 return false;
488}
Eugene Zelenko049b0172017-01-06 00:30:53 +0000489
Quentin Colombetd4131812016-04-07 20:27:33 +0000490bool AArch64PassConfig::addRegBankSelect() {
491 addPass(new RegBankSelect());
492 return false;
493}
Eugene Zelenko049b0172017-01-06 00:30:53 +0000494
Quentin Colombet7a43edd2017-05-27 01:34:07 +0000495void AArch64PassConfig::addPreGlobalInstructionSelect() {
496 // Workaround the deficiency of the fast register allocator.
497 if (TM->getOptLevel() == CodeGenOpt::None)
498 addPass(new Localizer());
499}
500
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000501bool AArch64PassConfig::addGlobalInstructionSelect() {
502 addPass(new InstructionSelect());
503 return false;
504}
Quentin Colombetd96f4952016-02-11 19:35:06 +0000505
Tim Northover3b0846e2014-05-24 12:50:23 +0000506bool AArch64PassConfig::addILPOpts() {
Jiangning Liu1a486da2014-09-05 02:55:24 +0000507 if (EnableCondOpt)
508 addPass(createAArch64ConditionOptimizerPass());
Tim Northover3b0846e2014-05-24 12:50:23 +0000509 if (EnableCCMP)
510 addPass(createAArch64ConditionalCompares());
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000511 if (EnableMCR)
512 addPass(&MachineCombinerID);
Chad Rosier6db9ff62017-06-23 19:20:12 +0000513 if (EnableCondBrTuning)
514 addPass(createAArch64CondBrTuning());
James Molloy99917942014-08-06 13:31:32 +0000515 if (EnableEarlyIfConversion)
516 addPass(&EarlyIfConverterID);
Tim Northover3b0846e2014-05-24 12:50:23 +0000517 if (EnableStPairSuppress)
518 addPass(createAArch64StorePairSuppressPass());
Abderrazek Zaafrani2c80e4c2017-12-08 00:58:49 +0000519 addPass(createAArch64SIMDInstrOptPass());
Tim Northover3b0846e2014-05-24 12:50:23 +0000520 return true;
521}
522
Matthias Braun7e37a5f2014-12-11 21:26:47 +0000523void AArch64PassConfig::addPreRegAlloc() {
Matthias Braun3d51cf02016-11-16 03:38:27 +0000524 // Change dead register definitions to refer to the zero register.
525 if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
526 addPass(createAArch64DeadRegisterDefinitions());
527
Tim Northover3b0846e2014-05-24 12:50:23 +0000528 // Use AdvSIMD scalar instructions whenever profitable.
Quentin Colombet0c740d42014-08-21 18:10:07 +0000529 if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
Matthias Braunb2f23882014-12-11 23:18:03 +0000530 addPass(createAArch64AdvSIMDScalar());
Quentin Colombet0c740d42014-08-21 18:10:07 +0000531 // The AdvSIMD pass may produce copies that can be rewritten to
532 // be register coaleascer friendly.
533 addPass(&PeepholeOptimizerID);
534 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000535}
536
Matthias Braun7e37a5f2014-12-11 21:26:47 +0000537void AArch64PassConfig::addPostRegAlloc() {
Jun Bum Limb389d9b2016-02-16 20:02:39 +0000538 // Remove redundant copy instructions.
539 if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
540 addPass(createAArch64RedundantCopyEliminationPass());
541
Eric Christopher6f1e5682015-03-03 23:22:40 +0000542 if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
James Molloy3feea9c2014-08-08 12:33:21 +0000543 // Improve performance for some FP/SIMD code for A57.
544 addPass(createAArch64A57FPLoadBalancing());
Tim Northover3b0846e2014-05-24 12:50:23 +0000545}
546
Matthias Braun7e37a5f2014-12-11 21:26:47 +0000547void AArch64PassConfig::addPreSched2() {
Tim Northover3b0846e2014-05-24 12:50:23 +0000548 // Expand some pseudo instructions to allow proper scheduling.
Matthias Braunb2f23882014-12-11 23:18:03 +0000549 addPass(createAArch64ExpandPseudoPass());
Tim Northover3b0846e2014-05-24 12:50:23 +0000550 // Use load/store pair instructions when possible.
Geoff Berry9962fae2017-07-18 16:14:22 +0000551 if (TM->getOptLevel() != CodeGenOpt::None) {
552 if (EnableLoadStoreOpt)
553 addPass(createAArch64LoadStoreOptimizationPass());
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000554 }
555
556 // The AArch64SpeculationHardeningPass destroys dominator tree and natural
557 // loop info, which is needed for the FalkorHWPFFixPass and also later on.
558 // Therefore, run the AArch64SpeculationHardeningPass before the
559 // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
560 // info.
561 addPass(createAArch64SpeculationHardeningPass());
562
563 if (TM->getOptLevel() != CodeGenOpt::None) {
Geoff Berry9962fae2017-07-18 16:14:22 +0000564 if (EnableFalkorHWPFFix)
565 addPass(createFalkorHWPFFixPass());
566 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000567}
568
Matthias Braun7e37a5f2014-12-11 21:26:47 +0000569void AArch64PassConfig::addPreEmitPass() {
Alexandros Lamprineas490ae112018-12-17 10:45:43 +0000570 // Machine Block Placement might have created new opportunities when run
571 // at O3, where the Tail Duplication Threshold is set to 4 instructions.
572 // Run the load/store optimizer once more.
573 if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
574 addPass(createAArch64LoadStoreOptimizationPass());
575
Bradley Smithf2a801d2014-10-13 10:12:35 +0000576 if (EnableA53Fix835769)
Matthias Braunb2f23882014-12-11 23:18:03 +0000577 addPass(createAArch64A53Fix835769());
Tim Northover3b0846e2014-05-24 12:50:23 +0000578 // Relax conditional branch instructions if they're otherwise out of
579 // range of their destination.
Diana Picus850043b2016-08-01 05:56:57 +0000580 if (BranchRelaxation)
Matt Arsenault36919a42016-10-06 15:38:53 +0000581 addPass(&BranchRelaxationPassID);
582
Oliver Stannard250e5a52018-10-08 14:04:24 +0000583 if (EnableBranchTargets)
584 addPass(createAArch64BranchTargetsPass());
585
Tim Northover1c353412018-10-24 20:19:09 +0000586 if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
587 addPass(createAArch64CompressJumpTablesPass());
588
Tim Northover3b0846e2014-05-24 12:50:23 +0000589 if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
Daniel Sandersc81f4502015-06-16 15:44:21 +0000590 TM->getTargetTriple().isOSBinFormatMachO())
Tim Northover3b0846e2014-05-24 12:50:23 +0000591 addPass(createAArch64CollectLOHPass());
Tim Northover3b0846e2014-05-24 12:50:23 +0000592}