blob: a790ab0f852327cb43b7fa80a23778480916a104 [file] [log] [blame]
Sam Parkera6fd9192019-06-25 10:45:51 +00001//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10/// instructions into machine operations.
11/// The expectation is that the loop contains three pseudo instructions:
12/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13/// form should be in the preheader, whereas the while form should be in the
14/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
15/// pre-preheader?
16/// - t2LoopDec - placed within in the loop body.
17/// - t2LoopEnd - the loop latch terminator.
18///
19//===----------------------------------------------------------------------===//
20
21#include "ARM.h"
22#include "ARMBaseInstrInfo.h"
23#include "ARMBaseRegisterInfo.h"
24#include "ARMBasicBlockInfo.h"
25#include "ARMSubtarget.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "arm-low-overhead-loops"
33#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
34
35namespace {
36
37 class ARMLowOverheadLoops : public MachineFunctionPass {
38 const ARMBaseInstrInfo *TII = nullptr;
39 MachineRegisterInfo *MRI = nullptr;
40 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
41
42 public:
43 static char ID;
44
45 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
49 AU.addRequired<MachineLoopInfo>();
50 MachineFunctionPass::getAnalysisUsage(AU);
51 }
52
53 bool runOnMachineFunction(MachineFunction &MF) override;
54
55 bool ProcessLoop(MachineLoop *ML);
56
Sam Parker775b2f52019-07-10 12:29:43 +000057 void RevertWhile(MachineInstr *MI) const;
58
59 void RevertLoopDec(MachineInstr *MI) const;
60
61 void RevertLoopEnd(MachineInstr *MI) const;
62
Sam Parkera6fd9192019-06-25 10:45:51 +000063 void Expand(MachineLoop *ML, MachineInstr *Start,
64 MachineInstr *Dec, MachineInstr *End, bool Revert);
65
66 MachineFunctionProperties getRequiredProperties() const override {
67 return MachineFunctionProperties().set(
68 MachineFunctionProperties::Property::NoVRegs);
69 }
70
71 StringRef getPassName() const override {
72 return ARM_LOW_OVERHEAD_LOOPS_NAME;
73 }
74 };
75}
76
77char ARMLowOverheadLoops::ID = 0;
78
79INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
80 false, false)
81
82bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
Sam Parkerbcf0eb72019-06-25 15:11:17 +000083 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
84 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +000085
86 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
87
88 auto &MLI = getAnalysis<MachineLoopInfo>();
89 MRI = &MF.getRegInfo();
90 TII = static_cast<const ARMBaseInstrInfo*>(
91 MF.getSubtarget().getInstrInfo());
92 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
93 BBUtils->computeAllBlockSizes();
94
95 bool Changed = false;
96 for (auto ML : MLI) {
97 if (!ML->getParentLoop())
98 Changed |= ProcessLoop(ML);
99 }
100 return Changed;
101}
102
103bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
104
105 bool Changed = false;
106
107 // Process inner loops first.
108 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
109 Changed |= ProcessLoop(*I);
110
111 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
112
113 auto IsLoopStart = [](MachineInstr &MI) {
Sam Parker98722692019-07-01 08:21:28 +0000114 return MI.getOpcode() == ARM::t2DoLoopStart ||
115 MI.getOpcode() == ARM::t2WhileLoopStart;
Sam Parkera6fd9192019-06-25 10:45:51 +0000116 };
117
Sam Parker98722692019-07-01 08:21:28 +0000118 // Search the given block for a loop start instruction. If one isn't found,
119 // and there's only one predecessor block, search that one too.
120 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
121 [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
Sam Parkera6fd9192019-06-25 10:45:51 +0000122 for (auto &MI : *MBB) {
123 if (IsLoopStart(MI))
124 return &MI;
125 }
Sam Parker98722692019-07-01 08:21:28 +0000126 if (MBB->pred_size() == 1)
127 return SearchForStart(*MBB->pred_begin());
Sam Parkera6fd9192019-06-25 10:45:51 +0000128 return nullptr;
129 };
130
131 MachineInstr *Start = nullptr;
132 MachineInstr *Dec = nullptr;
133 MachineInstr *End = nullptr;
134 bool Revert = false;
135
Sam Parker98722692019-07-01 08:21:28 +0000136 // Search the preheader for the start intrinsic, or look through the
137 // predecessors of the header to find exactly one set.iterations intrinsic.
138 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
139 // with potentially multiple set.loop.iterations, so we need to enable this.
140 if (auto *Preheader = ML->getLoopPreheader()) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000141 Start = SearchForStart(Preheader);
Sam Parker98722692019-07-01 08:21:28 +0000142 } else {
143 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
144 << " - Performing manual predecessor search.\n");
145 MachineBasicBlock *Pred = nullptr;
146 for (auto *MBB : ML->getHeader()->predecessors()) {
147 if (!ML->contains(MBB)) {
148 if (Pred) {
149 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
150 Start = nullptr;
151 break;
152 }
153 Pred = MBB;
154 Start = SearchForStart(MBB);
155 }
156 }
157 }
Sam Parkera6fd9192019-06-25 10:45:51 +0000158
159 // Find the low-overhead loop components and decide whether or not to fall
160 // back to a normal loop.
161 for (auto *MBB : reverse(ML->getBlocks())) {
162 for (auto &MI : *MBB) {
163 if (MI.getOpcode() == ARM::t2LoopDec)
164 Dec = &MI;
165 else if (MI.getOpcode() == ARM::t2LoopEnd)
166 End = &MI;
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000167 else if (MI.getDesc().isCall())
168 // TODO: Though the call will require LE to execute again, does this
169 // mean we should revert? Always executing LE hopefully should be
170 // faster than performing a sub,cmp,br or even subs,br.
171 Revert = true;
Sam Parkera6fd9192019-06-25 10:45:51 +0000172
173 if (!Dec)
174 continue;
175
Sam Parkera6fd9192019-06-25 10:45:51 +0000176 // If we find that we load/store LR between LoopDec and LoopEnd, expect
177 // that the decremented value has been spilled to the stack. Because
178 // this value isn't actually going to be produced until the latch, by LE,
179 // we would need to generate a real sub. The value is also likely to be
180 // reloaded for use of LoopEnd - in which in case we'd need to perform
181 // an add because it gets negated again by LE! The other option is to
182 // then generate the other form of LE which doesn't perform the sub.
183 if (MI.mayLoad() || MI.mayStore())
184 Revert =
185 MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
186 }
187
188 if (Dec && End && Revert)
189 break;
190 }
191
Sam Parker98722692019-07-01 08:21:28 +0000192 if (!Start && !Dec && !End) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000193 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
194 return Changed;
Sam Parker98722692019-07-01 08:21:28 +0000195 } if (!(Start && Dec && End)) {
196 report_fatal_error("Failed to find all loop components");
Sam Parkera6fd9192019-06-25 10:45:51 +0000197 }
198
199 if (!End->getOperand(1).isMBB() ||
200 End->getOperand(1).getMBB() != ML->getHeader())
201 report_fatal_error("Expected LoopEnd to target Loop Header");
202
203 // The LE instructions has 12-bits for the label offset.
204 if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
205 LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
206 Revert = true;
207 }
208
209 LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
210 << " - Found Loop Dec: " << *Dec
211 << " - Found Loop End: " << *End);
212
213 Expand(ML, Start, Dec, End, Revert);
214 return true;
215}
216
Sam Parker775b2f52019-07-10 12:29:43 +0000217// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
218// beq that branches to the exit branch.
219// FIXME: Need to check that we're not trashing the CPSR when generating the
220// cmp. We could also try to generate a cbz if the value in LR is also in
221// another low register.
222void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
223 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
224 MachineBasicBlock *MBB = MI->getParent();
225 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
226 TII->get(ARM::t2CMPri));
227 MIB.addReg(ARM::LR);
228 MIB.addImm(0);
229 MIB.addImm(ARMCC::AL);
230 MIB.addReg(ARM::CPSR);
231
232 // TODO: Try to use tBcc instead
233 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
234 MIB.add(MI->getOperand(1)); // branch target
235 MIB.addImm(ARMCC::EQ); // condition code
236 MIB.addReg(ARM::CPSR);
237 MI->eraseFromParent();
238}
239
240// TODO: Check flags so that we can possibly generate a tSubs or tSub.
241void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
242 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
243 MachineBasicBlock *MBB = MI->getParent();
244 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
245 TII->get(ARM::t2SUBri));
246 MIB.addDef(ARM::LR);
247 MIB.add(MI->getOperand(1));
248 MIB.add(MI->getOperand(2));
249 MIB.addImm(ARMCC::AL);
250 MIB.addReg(0);
251 MIB.addReg(0);
252 MI->eraseFromParent();
253}
254
255// Generate a subs, or sub and cmp, and a branch instead of an LE.
256// FIXME: Need to check that we're not trashing the CPSR when generating
257// the cmp.
258void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
259 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
260
261 // Create cmp
262 MachineBasicBlock *MBB = MI->getParent();
263 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
264 TII->get(ARM::t2CMPri));
265 MIB.addReg(ARM::LR);
266 MIB.addImm(0);
267 MIB.addImm(ARMCC::AL);
268 MIB.addReg(ARM::CPSR);
269
270 // TODO Try to use tBcc instead.
271 // Create bne
272 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
273 MIB.add(MI->getOperand(1)); // branch target
274 MIB.addImm(ARMCC::NE); // condition code
275 MIB.addReg(ARM::CPSR);
276 MI->eraseFromParent();
277}
278
Sam Parkera6fd9192019-06-25 10:45:51 +0000279void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
280 MachineInstr *Dec, MachineInstr *End,
281 bool Revert) {
282
283 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
284 // The trip count should already been held in LR since the instructions
285 // within the loop can only read and write to LR. So, there should be a
286 // mov to setup the count. WLS/DLS perform this move, so find the original
287 // and delete it - inserting WLS/DLS in its place.
288 MachineBasicBlock *MBB = Start->getParent();
289 MachineInstr *InsertPt = Start;
290 for (auto &I : MRI->def_instructions(ARM::LR)) {
291 if (I.getParent() != MBB)
292 continue;
293
294 // Always execute.
295 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
296 continue;
297
298 // Only handle move reg, if the trip count it will need moving into a reg
299 // before the setup instruction anyway.
300 if (!I.getDesc().isMoveReg() ||
301 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
302 continue;
303 InsertPt = &I;
304 break;
305 }
306
Sam Parker98722692019-07-01 08:21:28 +0000307 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
308 ARM::t2DLS : ARM::t2WLS;
Sam Parkera6fd9192019-06-25 10:45:51 +0000309 MachineInstrBuilder MIB =
Sam Parker98722692019-07-01 08:21:28 +0000310 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
Sam Parkera6fd9192019-06-25 10:45:51 +0000311
312 MIB.addDef(ARM::LR);
313 MIB.add(Start->getOperand(0));
Sam Parker98722692019-07-01 08:21:28 +0000314 if (Opc == ARM::t2WLS)
315 MIB.add(Start->getOperand(1));
316
317 if (InsertPt != Start)
318 InsertPt->eraseFromParent();
Sam Parkera6fd9192019-06-25 10:45:51 +0000319 Start->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000320 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
321 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000322 };
323
324 // Combine the LoopDec and LoopEnd instructions into LE(TP).
325 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
326 MachineInstr *End) {
327 MachineBasicBlock *MBB = End->getParent();
328 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
329 TII->get(ARM::t2LEUpdate));
330 MIB.addDef(ARM::LR);
331 MIB.add(End->getOperand(0));
332 MIB.add(End->getOperand(1));
333 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
334
Sam Parkera6fd9192019-06-25 10:45:51 +0000335 End->eraseFromParent();
336 Dec->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000337 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000338 };
339
Sam Parker98722692019-07-01 08:21:28 +0000340 // TODO: We should be able to automatically remove these branches before we
341 // get here - probably by teaching analyzeBranch about the pseudo
342 // instructions.
343 // If there is an unconditional branch, after I, that just branches to the
344 // next block, remove it.
345 auto RemoveDeadBranch = [](MachineInstr *I) {
346 MachineBasicBlock *BB = I->getParent();
347 MachineInstr *Terminator = &BB->instr_back();
348 if (Terminator->isUnconditionalBranch() && I != Terminator) {
349 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
350 if (BB->isLayoutSuccessor(Succ)) {
351 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
352 Terminator->eraseFromParent();
353 }
354 }
355 };
356
Sam Parkera6fd9192019-06-25 10:45:51 +0000357 if (Revert) {
Sam Parker98722692019-07-01 08:21:28 +0000358 if (Start->getOpcode() == ARM::t2WhileLoopStart)
Sam Parker775b2f52019-07-10 12:29:43 +0000359 RevertWhile(Start);
360 else
361 Start->eraseFromParent();
362 RevertLoopDec(Dec);
363 RevertLoopEnd(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000364 } else {
Sam Parker98722692019-07-01 08:21:28 +0000365 Start = ExpandLoopStart(ML, Start);
366 RemoveDeadBranch(Start);
367 End = ExpandLoopEnd(ML, Dec, End);
368 RemoveDeadBranch(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000369 }
370}
371
372FunctionPass *llvm::createARMLowOverheadLoopsPass() {
373 return new ARMLowOverheadLoops();
374}