blob: ecac67a2f3138219171c19138e990727692e283c [file] [log] [blame]
Sam Parkera6fd9192019-06-25 10:45:51 +00001//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10/// instructions into machine operations.
11/// The expectation is that the loop contains three pseudo instructions:
12/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13/// form should be in the preheader, whereas the while form should be in the
14/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
15/// pre-preheader?
16/// - t2LoopDec - placed within in the loop body.
17/// - t2LoopEnd - the loop latch terminator.
18///
19//===----------------------------------------------------------------------===//
20
21#include "ARM.h"
22#include "ARMBaseInstrInfo.h"
23#include "ARMBaseRegisterInfo.h"
24#include "ARMBasicBlockInfo.h"
25#include "ARMSubtarget.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "arm-low-overhead-loops"
33#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
34
35namespace {
36
37 class ARMLowOverheadLoops : public MachineFunctionPass {
38 const ARMBaseInstrInfo *TII = nullptr;
39 MachineRegisterInfo *MRI = nullptr;
40 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
41
42 public:
43 static char ID;
44
45 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
49 AU.addRequired<MachineLoopInfo>();
50 MachineFunctionPass::getAnalysisUsage(AU);
51 }
52
53 bool runOnMachineFunction(MachineFunction &MF) override;
54
55 bool ProcessLoop(MachineLoop *ML);
56
57 void Expand(MachineLoop *ML, MachineInstr *Start,
58 MachineInstr *Dec, MachineInstr *End, bool Revert);
59
60 MachineFunctionProperties getRequiredProperties() const override {
61 return MachineFunctionProperties().set(
62 MachineFunctionProperties::Property::NoVRegs);
63 }
64
65 StringRef getPassName() const override {
66 return ARM_LOW_OVERHEAD_LOOPS_NAME;
67 }
68 };
69}
70
71char ARMLowOverheadLoops::ID = 0;
72
73INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
74 false, false)
75
76bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
Sam Parkerbcf0eb72019-06-25 15:11:17 +000077 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
78 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +000079
80 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
81
82 auto &MLI = getAnalysis<MachineLoopInfo>();
83 MRI = &MF.getRegInfo();
84 TII = static_cast<const ARMBaseInstrInfo*>(
85 MF.getSubtarget().getInstrInfo());
86 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
87 BBUtils->computeAllBlockSizes();
88
89 bool Changed = false;
90 for (auto ML : MLI) {
91 if (!ML->getParentLoop())
92 Changed |= ProcessLoop(ML);
93 }
94 return Changed;
95}
96
97bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
98
99 bool Changed = false;
100
101 // Process inner loops first.
102 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
103 Changed |= ProcessLoop(*I);
104
105 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
106
107 auto IsLoopStart = [](MachineInstr &MI) {
Sam Parker98722692019-07-01 08:21:28 +0000108 return MI.getOpcode() == ARM::t2DoLoopStart ||
109 MI.getOpcode() == ARM::t2WhileLoopStart;
Sam Parkera6fd9192019-06-25 10:45:51 +0000110 };
111
Sam Parker98722692019-07-01 08:21:28 +0000112 // Search the given block for a loop start instruction. If one isn't found,
113 // and there's only one predecessor block, search that one too.
114 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
115 [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
Sam Parkera6fd9192019-06-25 10:45:51 +0000116 for (auto &MI : *MBB) {
117 if (IsLoopStart(MI))
118 return &MI;
119 }
Sam Parker98722692019-07-01 08:21:28 +0000120 if (MBB->pred_size() == 1)
121 return SearchForStart(*MBB->pred_begin());
Sam Parkera6fd9192019-06-25 10:45:51 +0000122 return nullptr;
123 };
124
125 MachineInstr *Start = nullptr;
126 MachineInstr *Dec = nullptr;
127 MachineInstr *End = nullptr;
128 bool Revert = false;
129
Sam Parker98722692019-07-01 08:21:28 +0000130 // Search the preheader for the start intrinsic, or look through the
131 // predecessors of the header to find exactly one set.iterations intrinsic.
132 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
133 // with potentially multiple set.loop.iterations, so we need to enable this.
134 if (auto *Preheader = ML->getLoopPreheader()) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000135 Start = SearchForStart(Preheader);
Sam Parker98722692019-07-01 08:21:28 +0000136 } else {
137 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
138 << " - Performing manual predecessor search.\n");
139 MachineBasicBlock *Pred = nullptr;
140 for (auto *MBB : ML->getHeader()->predecessors()) {
141 if (!ML->contains(MBB)) {
142 if (Pred) {
143 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
144 Start = nullptr;
145 break;
146 }
147 Pred = MBB;
148 Start = SearchForStart(MBB);
149 }
150 }
151 }
Sam Parkera6fd9192019-06-25 10:45:51 +0000152
153 // Find the low-overhead loop components and decide whether or not to fall
154 // back to a normal loop.
155 for (auto *MBB : reverse(ML->getBlocks())) {
156 for (auto &MI : *MBB) {
157 if (MI.getOpcode() == ARM::t2LoopDec)
158 Dec = &MI;
159 else if (MI.getOpcode() == ARM::t2LoopEnd)
160 End = &MI;
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000161 else if (MI.getDesc().isCall())
162 // TODO: Though the call will require LE to execute again, does this
163 // mean we should revert? Always executing LE hopefully should be
164 // faster than performing a sub,cmp,br or even subs,br.
165 Revert = true;
Sam Parkera6fd9192019-06-25 10:45:51 +0000166
167 if (!Dec)
168 continue;
169
Sam Parkera6fd9192019-06-25 10:45:51 +0000170 // If we find that we load/store LR between LoopDec and LoopEnd, expect
171 // that the decremented value has been spilled to the stack. Because
172 // this value isn't actually going to be produced until the latch, by LE,
173 // we would need to generate a real sub. The value is also likely to be
174 // reloaded for use of LoopEnd - in which in case we'd need to perform
175 // an add because it gets negated again by LE! The other option is to
176 // then generate the other form of LE which doesn't perform the sub.
177 if (MI.mayLoad() || MI.mayStore())
178 Revert =
179 MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
180 }
181
182 if (Dec && End && Revert)
183 break;
184 }
185
Sam Parker98722692019-07-01 08:21:28 +0000186 if (!Start && !Dec && !End) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000187 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
188 return Changed;
Sam Parker98722692019-07-01 08:21:28 +0000189 } if (!(Start && Dec && End)) {
190 report_fatal_error("Failed to find all loop components");
Sam Parkera6fd9192019-06-25 10:45:51 +0000191 }
192
193 if (!End->getOperand(1).isMBB() ||
194 End->getOperand(1).getMBB() != ML->getHeader())
195 report_fatal_error("Expected LoopEnd to target Loop Header");
196
197 // The LE instructions has 12-bits for the label offset.
198 if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
199 LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
200 Revert = true;
201 }
202
203 LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
204 << " - Found Loop Dec: " << *Dec
205 << " - Found Loop End: " << *End);
206
207 Expand(ML, Start, Dec, End, Revert);
208 return true;
209}
210
211void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
212 MachineInstr *Dec, MachineInstr *End,
213 bool Revert) {
214
215 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
216 // The trip count should already been held in LR since the instructions
217 // within the loop can only read and write to LR. So, there should be a
218 // mov to setup the count. WLS/DLS perform this move, so find the original
219 // and delete it - inserting WLS/DLS in its place.
220 MachineBasicBlock *MBB = Start->getParent();
221 MachineInstr *InsertPt = Start;
222 for (auto &I : MRI->def_instructions(ARM::LR)) {
223 if (I.getParent() != MBB)
224 continue;
225
226 // Always execute.
227 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
228 continue;
229
230 // Only handle move reg, if the trip count it will need moving into a reg
231 // before the setup instruction anyway.
232 if (!I.getDesc().isMoveReg() ||
233 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
234 continue;
235 InsertPt = &I;
236 break;
237 }
238
Sam Parker98722692019-07-01 08:21:28 +0000239 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
240 ARM::t2DLS : ARM::t2WLS;
Sam Parkera6fd9192019-06-25 10:45:51 +0000241 MachineInstrBuilder MIB =
Sam Parker98722692019-07-01 08:21:28 +0000242 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
Sam Parkera6fd9192019-06-25 10:45:51 +0000243
244 MIB.addDef(ARM::LR);
245 MIB.add(Start->getOperand(0));
Sam Parker98722692019-07-01 08:21:28 +0000246 if (Opc == ARM::t2WLS)
247 MIB.add(Start->getOperand(1));
248
249 if (InsertPt != Start)
250 InsertPt->eraseFromParent();
Sam Parkera6fd9192019-06-25 10:45:51 +0000251 Start->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000252 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
253 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000254 };
255
256 // Combine the LoopDec and LoopEnd instructions into LE(TP).
257 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
258 MachineInstr *End) {
259 MachineBasicBlock *MBB = End->getParent();
260 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
261 TII->get(ARM::t2LEUpdate));
262 MIB.addDef(ARM::LR);
263 MIB.add(End->getOperand(0));
264 MIB.add(End->getOperand(1));
265 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
266
Sam Parkera6fd9192019-06-25 10:45:51 +0000267 End->eraseFromParent();
268 Dec->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000269 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000270 };
271
272 // Generate a subs, or sub and cmp, and a branch instead of an LE.
273 // TODO: Check flags so that we can possibly generate a subs.
Sam Parker98722692019-07-01 08:21:28 +0000274 // FIXME: Need to check that we're not trashing the CPSR when generating
275 // the cmp.
Sam Parkera6fd9192019-06-25 10:45:51 +0000276 auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) {
277 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
278 // Create sub
279 MachineBasicBlock *MBB = Dec->getParent();
280 MachineInstrBuilder MIB = BuildMI(*MBB, Dec, Dec->getDebugLoc(),
281 TII->get(ARM::t2SUBri));
282 MIB.addDef(ARM::LR);
283 MIB.add(Dec->getOperand(1));
284 MIB.add(Dec->getOperand(2));
285 MIB.addImm(ARMCC::AL);
286 MIB.addReg(0);
287 MIB.addReg(0);
288
289 // Create cmp
290 MBB = End->getParent();
291 MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2CMPri));
292 MIB.addReg(ARM::LR);
293 MIB.addImm(0);
294 MIB.addImm(ARMCC::AL);
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000295 MIB.addReg(ARM::CPSR);
Sam Parkera6fd9192019-06-25 10:45:51 +0000296
297 // Create bne
298 MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc));
299 MIB.add(End->getOperand(1)); // branch target
300 MIB.addImm(ARMCC::NE); // condition code
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000301 MIB.addReg(ARM::CPSR);
Sam Parkera6fd9192019-06-25 10:45:51 +0000302 End->eraseFromParent();
303 Dec->eraseFromParent();
304 };
305
Sam Parker98722692019-07-01 08:21:28 +0000306 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
307 // beq that branches to the exit branch.
308 // FIXME: Need to check that we're not trashing the CPSR when generating the
309 // cmp. We could also try to generate a cbz if the value in LR is also in
310 // another low register.
311 auto ExpandStart = [this](MachineInstr *MI) {
312 MachineBasicBlock *MBB = MI->getParent();
313 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
314 TII->get(ARM::t2CMPri));
315 MIB.addReg(ARM::LR);
316 MIB.addImm(0);
317 MIB.addImm(ARMCC::AL);
318 MIB.addReg(ARM::CPSR);
319
320 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
321 MIB.add(MI->getOperand(1)); // branch target
322 MIB.addImm(ARMCC::EQ); // condition code
323 MIB.addReg(ARM::CPSR);
324 };
325
326 // TODO: We should be able to automatically remove these branches before we
327 // get here - probably by teaching analyzeBranch about the pseudo
328 // instructions.
329 // If there is an unconditional branch, after I, that just branches to the
330 // next block, remove it.
331 auto RemoveDeadBranch = [](MachineInstr *I) {
332 MachineBasicBlock *BB = I->getParent();
333 MachineInstr *Terminator = &BB->instr_back();
334 if (Terminator->isUnconditionalBranch() && I != Terminator) {
335 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
336 if (BB->isLayoutSuccessor(Succ)) {
337 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
338 Terminator->eraseFromParent();
339 }
340 }
341 };
342
Sam Parkera6fd9192019-06-25 10:45:51 +0000343 if (Revert) {
Sam Parker98722692019-07-01 08:21:28 +0000344 if (Start->getOpcode() == ARM::t2WhileLoopStart)
345 ExpandStart(Start);
Sam Parkera6fd9192019-06-25 10:45:51 +0000346 ExpandBranch(Dec, End);
Sam Parker98722692019-07-01 08:21:28 +0000347 Start->eraseFromParent();
Sam Parkera6fd9192019-06-25 10:45:51 +0000348 } else {
Sam Parker98722692019-07-01 08:21:28 +0000349 Start = ExpandLoopStart(ML, Start);
350 RemoveDeadBranch(Start);
351 End = ExpandLoopEnd(ML, Dec, End);
352 RemoveDeadBranch(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000353 }
354}
355
356FunctionPass *llvm::createARMLowOverheadLoopsPass() {
357 return new ARMLowOverheadLoops();
358}