blob: c1fc183b04f6f87e983e7bbe3d2a94ddd5588b98 [file] [log] [blame]
Eugene Zelenko96d933d2017-07-25 23:51:02 +00001//===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
Geoff Berryb1e87142017-07-14 21:44:12 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Geoff Berryb1e87142017-07-14 21:44:12 +00006//
7//===----------------------------------------------------------------------===//
Geoff Berry0cf9e702017-07-17 20:19:05 +00008/// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
9/// that may inhibit the HW prefetching. This is done in two steps. Before
10/// ISel, we mark strided loads (i.e. those that will likely benefit from
11/// prefetching) with metadata. Then, after opcodes have been finalized, we
Chad Rosieraf7519e92018-04-10 14:57:13 +000012/// insert MOVs and re-write loads to prevent unintentional tag collisions.
Geoff Berry0cf9e702017-07-17 20:19:05 +000013// ===---------------------------------------------------------------------===//
Geoff Berryb1e87142017-07-14 21:44:12 +000014
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000017#include "AArch64Subtarget.h"
Geoff Berryb1e87142017-07-14 21:44:12 +000018#include "AArch64TargetMachine.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000019#include "llvm/ADT/DenseMap.h"
Geoff Berryb1e87142017-07-14 21:44:12 +000020#include "llvm/ADT/DepthFirstIterator.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000021#include "llvm/ADT/None.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/ADT/SmallVector.h"
Geoff Berryb1e87142017-07-14 21:44:12 +000024#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/LoopInfo.h"
26#include "llvm/Analysis/ScalarEvolution.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
Geoff Berry9962fae2017-07-18 16:14:22 +000028#include "llvm/CodeGen/LiveRegUnits.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000029#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineFunctionPass.h"
32#include "llvm/CodeGen/MachineInstr.h"
Geoff Berry9962fae2017-07-18 16:14:22 +000033#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000034#include "llvm/CodeGen/MachineLoopInfo.h"
35#include "llvm/CodeGen/MachineOperand.h"
Geoff Berry9962fae2017-07-18 16:14:22 +000036#include "llvm/CodeGen/MachineRegisterInfo.h"
Geoff Berryb1e87142017-07-14 21:44:12 +000037#include "llvm/CodeGen/TargetPassConfig.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000038#include "llvm/CodeGen/TargetRegisterInfo.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000039#include "llvm/IR/DebugLoc.h"
Geoff Berryb1e87142017-07-14 21:44:12 +000040#include "llvm/IR/Dominators.h"
41#include "llvm/IR/Function.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000042#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/Metadata.h"
Reid Kleckner05da2fe2019-11-13 13:15:01 -080045#include "llvm/InitializePasses.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000046#include "llvm/Pass.h"
47#include "llvm/Support/Casting.h"
Geoff Berryb1e87142017-07-14 21:44:12 +000048#include "llvm/Support/Debug.h"
Geoff Berry5696e072018-04-10 21:43:03 +000049#include "llvm/Support/DebugCounter.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000050#include "llvm/Support/raw_ostream.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000051#include <cassert>
52#include <iterator>
53#include <utility>
Geoff Berryb1e87142017-07-14 21:44:12 +000054
55using namespace llvm;
56
57#define DEBUG_TYPE "falkor-hwpf-fix"
58
59STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
Geoff Berry9962fae2017-07-18 16:14:22 +000060STATISTIC(NumCollisionsAvoided,
61 "Number of HW prefetch tag collisions avoided");
62STATISTIC(NumCollisionsNotAvoided,
Chad Rosieraf7519e92018-04-10 14:57:13 +000063 "Number of HW prefetch tag collisions not avoided due to lack of registers");
Geoff Berry5696e072018-04-10 21:43:03 +000064DEBUG_COUNTER(FixCounter, "falkor-hwpf",
65 "Controls which tag collisions are avoided");
Geoff Berryb1e87142017-07-14 21:44:12 +000066
67namespace {
68
69class FalkorMarkStridedAccesses {
70public:
71 FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
72 : LI(LI), SE(SE) {}
73
74 bool run();
75
76private:
Geoff Berry0cf9e702017-07-17 20:19:05 +000077 bool runOnLoop(Loop &L);
Geoff Berryb1e87142017-07-14 21:44:12 +000078
79 LoopInfo &LI;
80 ScalarEvolution &SE;
81};
82
83class FalkorMarkStridedAccessesLegacy : public FunctionPass {
84public:
85 static char ID; // Pass ID, replacement for typeid
Eugene Zelenko96d933d2017-07-25 23:51:02 +000086
Geoff Berryb1e87142017-07-14 21:44:12 +000087 FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
88 initializeFalkorMarkStridedAccessesLegacyPass(
89 *PassRegistry::getPassRegistry());
90 }
91
92 void getAnalysisUsage(AnalysisUsage &AU) const override {
93 AU.addRequired<TargetPassConfig>();
94 AU.addPreserved<DominatorTreeWrapperPass>();
95 AU.addRequired<LoopInfoWrapperPass>();
96 AU.addPreserved<LoopInfoWrapperPass>();
97 AU.addRequired<ScalarEvolutionWrapperPass>();
Geoff Berry40549ad2017-08-16 19:03:16 +000098 AU.addPreserved<ScalarEvolutionWrapperPass>();
Geoff Berryb1e87142017-07-14 21:44:12 +000099 }
100
101 bool runOnFunction(Function &F) override;
102};
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000103
104} // end anonymous namespace
Geoff Berryb1e87142017-07-14 21:44:12 +0000105
106char FalkorMarkStridedAccessesLegacy::ID = 0;
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000107
Geoff Berryb1e87142017-07-14 21:44:12 +0000108INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
109 "Falkor HW Prefetch Fix", false, false)
110INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
111INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
112INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
113INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
114 "Falkor HW Prefetch Fix", false, false)
115
116FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
117 return new FalkorMarkStridedAccessesLegacy();
118}
119
120bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
121 TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
122 const AArch64Subtarget *ST =
123 TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
124 if (ST->getProcFamily() != AArch64Subtarget::Falkor)
125 return false;
126
127 if (skipFunction(F))
128 return false;
129
130 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
131 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
132
133 FalkorMarkStridedAccesses LDP(LI, SE);
134 return LDP.run();
135}
136
137bool FalkorMarkStridedAccesses::run() {
138 bool MadeChange = false;
139
Geoff Berry0cf9e702017-07-17 20:19:05 +0000140 for (Loop *L : LI)
141 for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt)
142 MadeChange |= runOnLoop(**LIt);
Geoff Berryb1e87142017-07-14 21:44:12 +0000143
144 return MadeChange;
145}
146
Geoff Berry0cf9e702017-07-17 20:19:05 +0000147bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
Geoff Berryb1e87142017-07-14 21:44:12 +0000148 // Only mark strided loads in the inner-most loop
Geoff Berry0cf9e702017-07-17 20:19:05 +0000149 if (!L.empty())
Geoff Berryb1e87142017-07-14 21:44:12 +0000150 return false;
151
152 bool MadeChange = false;
153
Geoff Berry0cf9e702017-07-17 20:19:05 +0000154 for (BasicBlock *BB : L.blocks()) {
155 for (Instruction &I : *BB) {
Geoff Berryb1e87142017-07-14 21:44:12 +0000156 LoadInst *LoadI = dyn_cast<LoadInst>(&I);
157 if (!LoadI)
158 continue;
159
160 Value *PtrValue = LoadI->getPointerOperand();
Geoff Berry0cf9e702017-07-17 20:19:05 +0000161 if (L.isLoopInvariant(PtrValue))
Geoff Berryb1e87142017-07-14 21:44:12 +0000162 continue;
163
164 const SCEV *LSCEV = SE.getSCEV(PtrValue);
165 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
166 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
167 continue;
168
169 LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
170 MDNode::get(LoadI->getContext(), {}));
171 ++NumStridedLoadsMarked;
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000172 LLVM_DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
Geoff Berryb1e87142017-07-14 21:44:12 +0000173 MadeChange = true;
174 }
175 }
176
177 return MadeChange;
178}
Geoff Berry9962fae2017-07-18 16:14:22 +0000179
180namespace {
181
182class FalkorHWPFFix : public MachineFunctionPass {
183public:
184 static char ID;
185
186 FalkorHWPFFix() : MachineFunctionPass(ID) {
187 initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry());
188 }
189
190 bool runOnMachineFunction(MachineFunction &Fn) override;
191
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000192 void getAnalysisUsage(AnalysisUsage &AU) const override {
Michael Zolotukhinfab7a672018-03-22 23:44:40 +0000193 AU.setPreservesCFG();
Geoff Berry9962fae2017-07-18 16:14:22 +0000194 AU.addRequired<MachineLoopInfo>();
195 MachineFunctionPass::getAnalysisUsage(AU);
196 }
197
198 MachineFunctionProperties getRequiredProperties() const override {
199 return MachineFunctionProperties().set(
200 MachineFunctionProperties::Property::NoVRegs);
201 }
202
203private:
204 void runOnLoop(MachineLoop &L, MachineFunction &Fn);
205
206 const AArch64InstrInfo *TII;
207 const TargetRegisterInfo *TRI;
208 DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap;
209 bool Modified;
210};
211
212/// Bits from load opcodes used to compute HW prefetcher instruction tags.
213struct LoadInfo {
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000214 LoadInfo() = default;
215
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000216 Register DestReg;
217 Register BaseReg;
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000218 int BaseRegIdx = -1;
219 const MachineOperand *OffsetOpnd = nullptr;
220 bool IsPrePost = false;
Geoff Berry9962fae2017-07-18 16:14:22 +0000221};
222
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000223} // end anonymous namespace
Geoff Berry9962fae2017-07-18 16:14:22 +0000224
225char FalkorHWPFFix::ID = 0;
226
227INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late",
228 "Falkor HW Prefetch Fix Late Phase", false, false)
229INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
230INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late",
231 "Falkor HW Prefetch Fix Late Phase", false, false)
232
233static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
234 return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8);
235}
236
237static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
238 int DestRegIdx;
239 int BaseRegIdx;
240 int OffsetIdx;
241 bool IsPrePost;
242
243 switch (MI.getOpcode()) {
244 default:
245 return None;
246
Geoff Berry9962fae2017-07-18 16:14:22 +0000247 case AArch64::LD1i64:
Geoff Berry9962fae2017-07-18 16:14:22 +0000248 case AArch64::LD2i64:
Geoff Berry9962fae2017-07-18 16:14:22 +0000249 DestRegIdx = 0;
250 BaseRegIdx = 3;
251 OffsetIdx = -1;
252 IsPrePost = false;
253 break;
254
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000255 case AArch64::LD1i8:
256 case AArch64::LD1i16:
257 case AArch64::LD1i32:
258 case AArch64::LD2i8:
259 case AArch64::LD2i16:
260 case AArch64::LD2i32:
261 case AArch64::LD3i8:
262 case AArch64::LD3i16:
263 case AArch64::LD3i32:
Geoff Berry9962fae2017-07-18 16:14:22 +0000264 case AArch64::LD3i64:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000265 case AArch64::LD4i8:
266 case AArch64::LD4i16:
267 case AArch64::LD4i32:
Geoff Berry9962fae2017-07-18 16:14:22 +0000268 case AArch64::LD4i64:
269 DestRegIdx = -1;
270 BaseRegIdx = 3;
271 OffsetIdx = -1;
272 IsPrePost = false;
273 break;
274
275 case AArch64::LD1Onev1d:
276 case AArch64::LD1Onev2s:
277 case AArch64::LD1Onev4h:
278 case AArch64::LD1Onev8b:
279 case AArch64::LD1Onev2d:
280 case AArch64::LD1Onev4s:
281 case AArch64::LD1Onev8h:
282 case AArch64::LD1Onev16b:
283 case AArch64::LD1Rv1d:
284 case AArch64::LD1Rv2s:
285 case AArch64::LD1Rv4h:
286 case AArch64::LD1Rv8b:
287 case AArch64::LD1Rv2d:
288 case AArch64::LD1Rv4s:
289 case AArch64::LD1Rv8h:
290 case AArch64::LD1Rv16b:
Geoff Berry9962fae2017-07-18 16:14:22 +0000291 DestRegIdx = 0;
292 BaseRegIdx = 1;
293 OffsetIdx = -1;
294 IsPrePost = false;
295 break;
296
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000297 case AArch64::LD1Twov1d:
298 case AArch64::LD1Twov2s:
299 case AArch64::LD1Twov4h:
300 case AArch64::LD1Twov8b:
Geoff Berry9962fae2017-07-18 16:14:22 +0000301 case AArch64::LD1Twov2d:
302 case AArch64::LD1Twov4s:
303 case AArch64::LD1Twov8h:
304 case AArch64::LD1Twov16b:
305 case AArch64::LD1Threev1d:
306 case AArch64::LD1Threev2s:
307 case AArch64::LD1Threev4h:
308 case AArch64::LD1Threev8b:
309 case AArch64::LD1Threev2d:
310 case AArch64::LD1Threev4s:
311 case AArch64::LD1Threev8h:
312 case AArch64::LD1Threev16b:
313 case AArch64::LD1Fourv1d:
314 case AArch64::LD1Fourv2s:
315 case AArch64::LD1Fourv4h:
316 case AArch64::LD1Fourv8b:
317 case AArch64::LD1Fourv2d:
318 case AArch64::LD1Fourv4s:
319 case AArch64::LD1Fourv8h:
320 case AArch64::LD1Fourv16b:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000321 case AArch64::LD2Twov2s:
322 case AArch64::LD2Twov4s:
323 case AArch64::LD2Twov8b:
Geoff Berry9962fae2017-07-18 16:14:22 +0000324 case AArch64::LD2Twov2d:
325 case AArch64::LD2Twov4h:
326 case AArch64::LD2Twov8h:
327 case AArch64::LD2Twov16b:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000328 case AArch64::LD2Rv1d:
329 case AArch64::LD2Rv2s:
330 case AArch64::LD2Rv4s:
331 case AArch64::LD2Rv8b:
Geoff Berry9962fae2017-07-18 16:14:22 +0000332 case AArch64::LD2Rv2d:
333 case AArch64::LD2Rv4h:
334 case AArch64::LD2Rv8h:
335 case AArch64::LD2Rv16b:
336 case AArch64::LD3Threev2s:
337 case AArch64::LD3Threev4h:
338 case AArch64::LD3Threev8b:
339 case AArch64::LD3Threev2d:
340 case AArch64::LD3Threev4s:
341 case AArch64::LD3Threev8h:
342 case AArch64::LD3Threev16b:
343 case AArch64::LD3Rv1d:
344 case AArch64::LD3Rv2s:
345 case AArch64::LD3Rv4h:
346 case AArch64::LD3Rv8b:
347 case AArch64::LD3Rv2d:
348 case AArch64::LD3Rv4s:
349 case AArch64::LD3Rv8h:
350 case AArch64::LD3Rv16b:
351 case AArch64::LD4Fourv2s:
352 case AArch64::LD4Fourv4h:
353 case AArch64::LD4Fourv8b:
354 case AArch64::LD4Fourv2d:
355 case AArch64::LD4Fourv4s:
356 case AArch64::LD4Fourv8h:
357 case AArch64::LD4Fourv16b:
358 case AArch64::LD4Rv1d:
359 case AArch64::LD4Rv2s:
360 case AArch64::LD4Rv4h:
361 case AArch64::LD4Rv8b:
362 case AArch64::LD4Rv2d:
363 case AArch64::LD4Rv4s:
364 case AArch64::LD4Rv8h:
365 case AArch64::LD4Rv16b:
366 DestRegIdx = -1;
367 BaseRegIdx = 1;
368 OffsetIdx = -1;
369 IsPrePost = false;
370 break;
371
Geoff Berry9962fae2017-07-18 16:14:22 +0000372 case AArch64::LD1i64_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000373 case AArch64::LD2i64_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000374 DestRegIdx = 1;
375 BaseRegIdx = 4;
376 OffsetIdx = 5;
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000377 IsPrePost = true;
Geoff Berry9962fae2017-07-18 16:14:22 +0000378 break;
379
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000380 case AArch64::LD1i8_POST:
381 case AArch64::LD1i16_POST:
382 case AArch64::LD1i32_POST:
383 case AArch64::LD2i8_POST:
384 case AArch64::LD2i16_POST:
385 case AArch64::LD2i32_POST:
386 case AArch64::LD3i8_POST:
387 case AArch64::LD3i16_POST:
388 case AArch64::LD3i32_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000389 case AArch64::LD3i64_POST:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000390 case AArch64::LD4i8_POST:
391 case AArch64::LD4i16_POST:
392 case AArch64::LD4i32_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000393 case AArch64::LD4i64_POST:
394 DestRegIdx = -1;
395 BaseRegIdx = 4;
396 OffsetIdx = 5;
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000397 IsPrePost = true;
Geoff Berry9962fae2017-07-18 16:14:22 +0000398 break;
399
400 case AArch64::LD1Onev1d_POST:
401 case AArch64::LD1Onev2s_POST:
402 case AArch64::LD1Onev4h_POST:
403 case AArch64::LD1Onev8b_POST:
404 case AArch64::LD1Onev2d_POST:
405 case AArch64::LD1Onev4s_POST:
406 case AArch64::LD1Onev8h_POST:
407 case AArch64::LD1Onev16b_POST:
408 case AArch64::LD1Rv1d_POST:
409 case AArch64::LD1Rv2s_POST:
410 case AArch64::LD1Rv4h_POST:
411 case AArch64::LD1Rv8b_POST:
412 case AArch64::LD1Rv2d_POST:
413 case AArch64::LD1Rv4s_POST:
414 case AArch64::LD1Rv8h_POST:
415 case AArch64::LD1Rv16b_POST:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000416 DestRegIdx = 1;
417 BaseRegIdx = 2;
418 OffsetIdx = 3;
419 IsPrePost = true;
420 break;
421
Geoff Berry9962fae2017-07-18 16:14:22 +0000422 case AArch64::LD1Twov1d_POST:
423 case AArch64::LD1Twov2s_POST:
424 case AArch64::LD1Twov4h_POST:
425 case AArch64::LD1Twov8b_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000426 case AArch64::LD1Twov2d_POST:
427 case AArch64::LD1Twov4s_POST:
428 case AArch64::LD1Twov8h_POST:
429 case AArch64::LD1Twov16b_POST:
430 case AArch64::LD1Threev1d_POST:
431 case AArch64::LD1Threev2s_POST:
432 case AArch64::LD1Threev4h_POST:
433 case AArch64::LD1Threev8b_POST:
434 case AArch64::LD1Threev2d_POST:
435 case AArch64::LD1Threev4s_POST:
436 case AArch64::LD1Threev8h_POST:
437 case AArch64::LD1Threev16b_POST:
438 case AArch64::LD1Fourv1d_POST:
439 case AArch64::LD1Fourv2s_POST:
440 case AArch64::LD1Fourv4h_POST:
441 case AArch64::LD1Fourv8b_POST:
442 case AArch64::LD1Fourv2d_POST:
443 case AArch64::LD1Fourv4s_POST:
444 case AArch64::LD1Fourv8h_POST:
445 case AArch64::LD1Fourv16b_POST:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000446 case AArch64::LD2Twov2s_POST:
447 case AArch64::LD2Twov4s_POST:
448 case AArch64::LD2Twov8b_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000449 case AArch64::LD2Twov2d_POST:
450 case AArch64::LD2Twov4h_POST:
451 case AArch64::LD2Twov8h_POST:
452 case AArch64::LD2Twov16b_POST:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000453 case AArch64::LD2Rv1d_POST:
454 case AArch64::LD2Rv2s_POST:
455 case AArch64::LD2Rv4s_POST:
456 case AArch64::LD2Rv8b_POST:
Geoff Berry9962fae2017-07-18 16:14:22 +0000457 case AArch64::LD2Rv2d_POST:
458 case AArch64::LD2Rv4h_POST:
459 case AArch64::LD2Rv8h_POST:
460 case AArch64::LD2Rv16b_POST:
461 case AArch64::LD3Threev2s_POST:
462 case AArch64::LD3Threev4h_POST:
463 case AArch64::LD3Threev8b_POST:
464 case AArch64::LD3Threev2d_POST:
465 case AArch64::LD3Threev4s_POST:
466 case AArch64::LD3Threev8h_POST:
467 case AArch64::LD3Threev16b_POST:
468 case AArch64::LD3Rv1d_POST:
469 case AArch64::LD3Rv2s_POST:
470 case AArch64::LD3Rv4h_POST:
471 case AArch64::LD3Rv8b_POST:
472 case AArch64::LD3Rv2d_POST:
473 case AArch64::LD3Rv4s_POST:
474 case AArch64::LD3Rv8h_POST:
475 case AArch64::LD3Rv16b_POST:
476 case AArch64::LD4Fourv2s_POST:
477 case AArch64::LD4Fourv4h_POST:
478 case AArch64::LD4Fourv8b_POST:
479 case AArch64::LD4Fourv2d_POST:
480 case AArch64::LD4Fourv4s_POST:
481 case AArch64::LD4Fourv8h_POST:
482 case AArch64::LD4Fourv16b_POST:
483 case AArch64::LD4Rv1d_POST:
484 case AArch64::LD4Rv2s_POST:
485 case AArch64::LD4Rv4h_POST:
486 case AArch64::LD4Rv8b_POST:
487 case AArch64::LD4Rv2d_POST:
488 case AArch64::LD4Rv4s_POST:
489 case AArch64::LD4Rv8h_POST:
490 case AArch64::LD4Rv16b_POST:
491 DestRegIdx = -1;
492 BaseRegIdx = 2;
493 OffsetIdx = 3;
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000494 IsPrePost = true;
Geoff Berry9962fae2017-07-18 16:14:22 +0000495 break;
496
497 case AArch64::LDRBBroW:
498 case AArch64::LDRBBroX:
499 case AArch64::LDRBBui:
500 case AArch64::LDRBroW:
501 case AArch64::LDRBroX:
502 case AArch64::LDRBui:
503 case AArch64::LDRDl:
504 case AArch64::LDRDroW:
505 case AArch64::LDRDroX:
506 case AArch64::LDRDui:
507 case AArch64::LDRHHroW:
508 case AArch64::LDRHHroX:
509 case AArch64::LDRHHui:
510 case AArch64::LDRHroW:
511 case AArch64::LDRHroX:
512 case AArch64::LDRHui:
513 case AArch64::LDRQl:
514 case AArch64::LDRQroW:
515 case AArch64::LDRQroX:
516 case AArch64::LDRQui:
517 case AArch64::LDRSBWroW:
518 case AArch64::LDRSBWroX:
519 case AArch64::LDRSBWui:
520 case AArch64::LDRSBXroW:
521 case AArch64::LDRSBXroX:
522 case AArch64::LDRSBXui:
523 case AArch64::LDRSHWroW:
524 case AArch64::LDRSHWroX:
525 case AArch64::LDRSHWui:
526 case AArch64::LDRSHXroW:
527 case AArch64::LDRSHXroX:
528 case AArch64::LDRSHXui:
529 case AArch64::LDRSWl:
530 case AArch64::LDRSWroW:
531 case AArch64::LDRSWroX:
532 case AArch64::LDRSWui:
533 case AArch64::LDRSl:
534 case AArch64::LDRSroW:
535 case AArch64::LDRSroX:
536 case AArch64::LDRSui:
537 case AArch64::LDRWl:
538 case AArch64::LDRWroW:
539 case AArch64::LDRWroX:
540 case AArch64::LDRWui:
541 case AArch64::LDRXl:
542 case AArch64::LDRXroW:
543 case AArch64::LDRXroX:
544 case AArch64::LDRXui:
545 case AArch64::LDURBBi:
546 case AArch64::LDURBi:
547 case AArch64::LDURDi:
548 case AArch64::LDURHHi:
549 case AArch64::LDURHi:
550 case AArch64::LDURQi:
551 case AArch64::LDURSBWi:
552 case AArch64::LDURSBXi:
553 case AArch64::LDURSHWi:
554 case AArch64::LDURSHXi:
555 case AArch64::LDURSWi:
556 case AArch64::LDURSi:
557 case AArch64::LDURWi:
558 case AArch64::LDURXi:
559 DestRegIdx = 0;
560 BaseRegIdx = 1;
561 OffsetIdx = 2;
562 IsPrePost = false;
563 break;
564
565 case AArch64::LDRBBpost:
566 case AArch64::LDRBBpre:
567 case AArch64::LDRBpost:
568 case AArch64::LDRBpre:
569 case AArch64::LDRDpost:
570 case AArch64::LDRDpre:
571 case AArch64::LDRHHpost:
572 case AArch64::LDRHHpre:
573 case AArch64::LDRHpost:
574 case AArch64::LDRHpre:
575 case AArch64::LDRQpost:
576 case AArch64::LDRQpre:
577 case AArch64::LDRSBWpost:
578 case AArch64::LDRSBWpre:
579 case AArch64::LDRSBXpost:
580 case AArch64::LDRSBXpre:
581 case AArch64::LDRSHWpost:
582 case AArch64::LDRSHWpre:
583 case AArch64::LDRSHXpost:
584 case AArch64::LDRSHXpre:
585 case AArch64::LDRSWpost:
586 case AArch64::LDRSWpre:
587 case AArch64::LDRSpost:
588 case AArch64::LDRSpre:
589 case AArch64::LDRWpost:
590 case AArch64::LDRWpre:
591 case AArch64::LDRXpost:
592 case AArch64::LDRXpre:
593 DestRegIdx = 1;
594 BaseRegIdx = 2;
595 OffsetIdx = 3;
596 IsPrePost = true;
597 break;
598
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000599 case AArch64::LDNPDi:
600 case AArch64::LDNPQi:
601 case AArch64::LDNPSi:
Geoff Berry9962fae2017-07-18 16:14:22 +0000602 case AArch64::LDPQi:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000603 case AArch64::LDPDi:
604 case AArch64::LDPSi:
Geoff Berry9962fae2017-07-18 16:14:22 +0000605 DestRegIdx = -1;
606 BaseRegIdx = 2;
607 OffsetIdx = 3;
608 IsPrePost = false;
609 break;
610
611 case AArch64::LDPSWi:
Geoff Berry9962fae2017-07-18 16:14:22 +0000612 case AArch64::LDPWi:
613 case AArch64::LDPXi:
614 DestRegIdx = 0;
615 BaseRegIdx = 2;
616 OffsetIdx = 3;
617 IsPrePost = false;
618 break;
619
620 case AArch64::LDPQpost:
621 case AArch64::LDPQpre:
Geoff Berrya4b2f5d2017-09-26 21:40:41 +0000622 case AArch64::LDPDpost:
623 case AArch64::LDPDpre:
624 case AArch64::LDPSpost:
625 case AArch64::LDPSpre:
Geoff Berry9962fae2017-07-18 16:14:22 +0000626 DestRegIdx = -1;
627 BaseRegIdx = 3;
628 OffsetIdx = 4;
629 IsPrePost = true;
630 break;
631
Geoff Berry9962fae2017-07-18 16:14:22 +0000632 case AArch64::LDPSWpost:
633 case AArch64::LDPSWpre:
Geoff Berry9962fae2017-07-18 16:14:22 +0000634 case AArch64::LDPWpost:
635 case AArch64::LDPWpre:
636 case AArch64::LDPXpost:
637 case AArch64::LDPXpre:
638 DestRegIdx = 1;
639 BaseRegIdx = 3;
640 OffsetIdx = 4;
641 IsPrePost = true;
642 break;
643 }
644
Geoff Berryc032b2b2017-09-27 17:14:10 +0000645 // Loads from the stack pointer don't get prefetched.
Daniel Sanders5ae66e52019-08-12 22:40:53 +0000646 Register BaseReg = MI.getOperand(BaseRegIdx).getReg();
Geoff Berryc032b2b2017-09-27 17:14:10 +0000647 if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP)
648 return None;
649
Geoff Berry9962fae2017-07-18 16:14:22 +0000650 LoadInfo LI;
Matt Arsenaulte3a676e2019-06-24 15:50:29 +0000651 LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg();
Geoff Berryc032b2b2017-09-27 17:14:10 +0000652 LI.BaseReg = BaseReg;
Geoff Berry9962fae2017-07-18 16:14:22 +0000653 LI.BaseRegIdx = BaseRegIdx;
654 LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
655 LI.IsPrePost = IsPrePost;
656 return LI;
657}
658
659static Optional<unsigned> getTag(const TargetRegisterInfo *TRI,
660 const MachineInstr &MI, const LoadInfo &LI) {
661 unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0;
662 unsigned Base = TRI->getEncodingValue(LI.BaseReg);
663 unsigned Off;
664 if (LI.OffsetOpnd == nullptr)
665 Off = 0;
666 else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() ||
667 LI.OffsetOpnd->isCPI())
668 return None;
669 else if (LI.OffsetOpnd->isReg())
670 Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg());
671 else
672 Off = LI.OffsetOpnd->getImm() >> 2;
673
674 return makeTag(Dest, Base, Off);
675}
676
677void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
678 // Build the initial tag map for the whole loop.
679 TagMap.clear();
680 for (MachineBasicBlock *MBB : L.getBlocks())
681 for (MachineInstr &MI : *MBB) {
682 Optional<LoadInfo> LInfo = getLoadInfo(MI);
683 if (!LInfo)
684 continue;
685 Optional<unsigned> Tag = getTag(TRI, MI, *LInfo);
686 if (!Tag)
687 continue;
688 TagMap[*Tag].push_back(&MI);
689 }
690
691 bool AnyCollisions = false;
692 for (auto &P : TagMap) {
693 auto Size = P.second.size();
694 if (Size > 1) {
695 for (auto *MI : P.second) {
696 if (TII->isStridedAccess(*MI)) {
697 AnyCollisions = true;
698 break;
699 }
700 }
701 }
702 if (AnyCollisions)
703 break;
704 }
705 // Nothing to fix.
706 if (!AnyCollisions)
707 return;
708
709 MachineRegisterInfo &MRI = Fn.getRegInfo();
710
711 // Go through all the basic blocks in the current loop and fix any streaming
712 // loads to avoid collisions with any other loads.
713 LiveRegUnits LR(*TRI);
714 for (MachineBasicBlock *MBB : L.getBlocks()) {
715 LR.clear();
716 LR.addLiveOuts(*MBB);
717 for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) {
718 MachineInstr &MI = *I;
719 if (!TII->isStridedAccess(MI))
720 continue;
721
Geoff Berrybbfa2462017-09-26 21:40:46 +0000722 Optional<LoadInfo> OptLdI = getLoadInfo(MI);
723 if (!OptLdI)
724 continue;
725 LoadInfo LdI = *OptLdI;
726 Optional<unsigned> OptOldTag = getTag(TRI, MI, LdI);
727 if (!OptOldTag)
728 continue;
729 auto &OldCollisions = TagMap[*OptOldTag];
Geoff Berry9962fae2017-07-18 16:14:22 +0000730 if (OldCollisions.size() <= 1)
731 continue;
732
733 bool Fixed = false;
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000734 LLVM_DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
Geoff Berry9962fae2017-07-18 16:14:22 +0000735
Geoff Berry5696e072018-04-10 21:43:03 +0000736 if (!DebugCounter::shouldExecute(FixCounter)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000737 LLVM_DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI);
Geoff Berry5696e072018-04-10 21:43:03 +0000738 continue;
739 }
740
741 // Add the non-base registers of MI as live so we don't use them as
742 // scratch registers.
743 for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) {
744 if (OpI == static_cast<unsigned>(LdI.BaseRegIdx))
745 continue;
746 MachineOperand &MO = MI.getOperand(OpI);
747 if (MO.isReg() && MO.readsReg())
748 LR.addReg(MO.getReg());
749 }
750
Geoff Berry9962fae2017-07-18 16:14:22 +0000751 for (unsigned ScratchReg : AArch64::GPR64RegClass) {
752 if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
753 continue;
754
755 LoadInfo NewLdI(LdI);
756 NewLdI.BaseReg = ScratchReg;
757 unsigned NewTag = *getTag(TRI, MI, NewLdI);
758 // Scratch reg tag would collide too, so don't use it.
759 if (TagMap.count(NewTag))
760 continue;
761
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000762 LLVM_DEBUG(dbgs() << "Changing base reg to: "
763 << printReg(ScratchReg, TRI) << '\n');
Geoff Berry9962fae2017-07-18 16:14:22 +0000764
765 // Rewrite:
766 // Xd = LOAD Xb, off
767 // to:
768 // Xc = MOV Xb
769 // Xd = LOAD Xc, off
770 DebugLoc DL = MI.getDebugLoc();
771 BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg)
772 .addReg(AArch64::XZR)
773 .addReg(LdI.BaseReg)
774 .addImm(0);
775 MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx);
776 BaseOpnd.setReg(ScratchReg);
777
778 // If the load does a pre/post increment, then insert a MOV after as
779 // well to update the real base register.
780 if (LdI.IsPrePost) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000781 LLVM_DEBUG(dbgs() << "Doing post MOV of incremented reg: "
782 << printReg(ScratchReg, TRI) << '\n');
Geoff Berry9962fae2017-07-18 16:14:22 +0000783 MI.getOperand(0).setReg(
784 ScratchReg); // Change tied operand pre/post update dest.
785 BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL,
786 TII->get(AArch64::ORRXrs), LdI.BaseReg)
787 .addReg(AArch64::XZR)
788 .addReg(ScratchReg)
789 .addImm(0);
790 }
791
792 for (int I = 0, E = OldCollisions.size(); I != E; ++I)
793 if (OldCollisions[I] == &MI) {
794 std::swap(OldCollisions[I], OldCollisions[E - 1]);
795 OldCollisions.pop_back();
796 break;
797 }
798
799 // Update TagMap to reflect instruction changes to reduce the number
800 // of later MOVs to be inserted. This needs to be done after
801 // OldCollisions is updated since it may be relocated by this
802 // insertion.
803 TagMap[NewTag].push_back(&MI);
804 ++NumCollisionsAvoided;
805 Fixed = true;
806 Modified = true;
807 break;
808 }
809 if (!Fixed)
810 ++NumCollisionsNotAvoided;
811 }
812 }
813}
814
815bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
816 auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
817 if (ST.getProcFamily() != AArch64Subtarget::Falkor)
818 return false;
819
Matthias Braunf1caa282017-12-15 22:22:58 +0000820 if (skipFunction(Fn.getFunction()))
Geoff Berry9962fae2017-07-18 16:14:22 +0000821 return false;
822
823 TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
824 TRI = ST.getRegisterInfo();
825
826 assert(TRI->trackLivenessAfterRegAlloc(Fn) &&
827 "Register liveness not available!");
828
829 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
830
831 Modified = false;
832
833 for (MachineLoop *I : LI)
834 for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
835 // Only process inner-loops
836 if (L->empty())
837 runOnLoop(**L, Fn);
838
839 return Modified;
840}
841
842FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); }