blob: cfd1f40871077f425392701b3546f3bf9a82fe2e [file] [log] [blame]
Sam Parkera6fd9192019-06-25 10:45:51 +00001//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10/// instructions into machine operations.
11/// The expectation is that the loop contains three pseudo instructions:
12/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13/// form should be in the preheader, whereas the while form should be in the
Sam Parker173de032019-08-07 07:39:19 +000014/// preheaders only predecessor.
Sam Parkera6fd9192019-06-25 10:45:51 +000015/// - t2LoopDec - placed within in the loop body.
16/// - t2LoopEnd - the loop latch terminator.
17///
18//===----------------------------------------------------------------------===//
19
20#include "ARM.h"
21#include "ARMBaseInstrInfo.h"
22#include "ARMBaseRegisterInfo.h"
23#include "ARMBasicBlockInfo.h"
24#include "ARMSubtarget.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineLoopInfo.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "arm-low-overhead-loops"
32#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
33
34namespace {
35
36 class ARMLowOverheadLoops : public MachineFunctionPass {
37 const ARMBaseInstrInfo *TII = nullptr;
38 MachineRegisterInfo *MRI = nullptr;
39 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
40
41 public:
42 static char ID;
43
44 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
45
46 void getAnalysisUsage(AnalysisUsage &AU) const override {
47 AU.setPreservesCFG();
48 AU.addRequired<MachineLoopInfo>();
49 MachineFunctionPass::getAnalysisUsage(AU);
50 }
51
52 bool runOnMachineFunction(MachineFunction &MF) override;
53
54 bool ProcessLoop(MachineLoop *ML);
55
Sam Parker4379a402019-07-22 14:16:40 +000056 bool RevertNonLoops(MachineFunction &MF);
57
Sam Parker775b2f52019-07-10 12:29:43 +000058 void RevertWhile(MachineInstr *MI) const;
59
60 void RevertLoopDec(MachineInstr *MI) const;
61
62 void RevertLoopEnd(MachineInstr *MI) const;
63
Sam Parkera6fd9192019-06-25 10:45:51 +000064 void Expand(MachineLoop *ML, MachineInstr *Start,
65 MachineInstr *Dec, MachineInstr *End, bool Revert);
66
67 MachineFunctionProperties getRequiredProperties() const override {
68 return MachineFunctionProperties().set(
69 MachineFunctionProperties::Property::NoVRegs);
70 }
71
72 StringRef getPassName() const override {
73 return ARM_LOW_OVERHEAD_LOOPS_NAME;
74 }
75 };
76}
Sjoerd Meijera19f5a72019-07-24 13:30:36 +000077
Sam Parkera6fd9192019-06-25 10:45:51 +000078char ARMLowOverheadLoops::ID = 0;
79
80INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
81 false, false)
82
83bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
Sam Parkerbcf0eb72019-06-25 15:11:17 +000084 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
85 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +000086
87 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
88
89 auto &MLI = getAnalysis<MachineLoopInfo>();
90 MRI = &MF.getRegInfo();
91 TII = static_cast<const ARMBaseInstrInfo*>(
92 MF.getSubtarget().getInstrInfo());
93 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
94 BBUtils->computeAllBlockSizes();
Sam Parker08b4a8d2019-07-11 09:56:15 +000095 BBUtils->adjustBBOffsetsAfter(&MF.front());
Sam Parkera6fd9192019-06-25 10:45:51 +000096
97 bool Changed = false;
98 for (auto ML : MLI) {
99 if (!ML->getParentLoop())
100 Changed |= ProcessLoop(ML);
101 }
Sam Parker4379a402019-07-22 14:16:40 +0000102 Changed |= RevertNonLoops(MF);
Sam Parkera6fd9192019-06-25 10:45:51 +0000103 return Changed;
104}
105
Sam Parker4379a402019-07-22 14:16:40 +0000106static bool IsLoopStart(MachineInstr &MI) {
107 return MI.getOpcode() == ARM::t2DoLoopStart ||
108 MI.getOpcode() == ARM::t2WhileLoopStart;
109}
110
Sam Parkera6fd9192019-06-25 10:45:51 +0000111bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
112
113 bool Changed = false;
114
115 // Process inner loops first.
116 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
117 Changed |= ProcessLoop(*I);
118
119 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
120
Sam Parker98722692019-07-01 08:21:28 +0000121 // Search the given block for a loop start instruction. If one isn't found,
122 // and there's only one predecessor block, search that one too.
123 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
Sam Parker4379a402019-07-22 14:16:40 +0000124 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
Sam Parkera6fd9192019-06-25 10:45:51 +0000125 for (auto &MI : *MBB) {
126 if (IsLoopStart(MI))
127 return &MI;
128 }
Sam Parker98722692019-07-01 08:21:28 +0000129 if (MBB->pred_size() == 1)
130 return SearchForStart(*MBB->pred_begin());
Sam Parkera6fd9192019-06-25 10:45:51 +0000131 return nullptr;
132 };
133
134 MachineInstr *Start = nullptr;
135 MachineInstr *Dec = nullptr;
136 MachineInstr *End = nullptr;
137 bool Revert = false;
138
Sam Parker98722692019-07-01 08:21:28 +0000139 // Search the preheader for the start intrinsic, or look through the
140 // predecessors of the header to find exactly one set.iterations intrinsic.
141 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
142 // with potentially multiple set.loop.iterations, so we need to enable this.
143 if (auto *Preheader = ML->getLoopPreheader()) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000144 Start = SearchForStart(Preheader);
Sam Parker98722692019-07-01 08:21:28 +0000145 } else {
146 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
147 << " - Performing manual predecessor search.\n");
148 MachineBasicBlock *Pred = nullptr;
149 for (auto *MBB : ML->getHeader()->predecessors()) {
150 if (!ML->contains(MBB)) {
151 if (Pred) {
152 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
153 Start = nullptr;
154 break;
155 }
156 Pred = MBB;
157 Start = SearchForStart(MBB);
158 }
159 }
160 }
Sam Parkera6fd9192019-06-25 10:45:51 +0000161
162 // Find the low-overhead loop components and decide whether or not to fall
163 // back to a normal loop.
164 for (auto *MBB : reverse(ML->getBlocks())) {
165 for (auto &MI : *MBB) {
166 if (MI.getOpcode() == ARM::t2LoopDec)
167 Dec = &MI;
168 else if (MI.getOpcode() == ARM::t2LoopEnd)
169 End = &MI;
Sam Parker4379a402019-07-22 14:16:40 +0000170 else if (IsLoopStart(MI))
171 Start = &MI;
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000172 else if (MI.getDesc().isCall())
173 // TODO: Though the call will require LE to execute again, does this
174 // mean we should revert? Always executing LE hopefully should be
175 // faster than performing a sub,cmp,br or even subs,br.
176 Revert = true;
Sam Parkera6fd9192019-06-25 10:45:51 +0000177
Sam Parker173de032019-08-07 07:39:19 +0000178 if (!Dec || End)
Sam Parkera6fd9192019-06-25 10:45:51 +0000179 continue;
180
Sam Parker173de032019-08-07 07:39:19 +0000181 // If we find that LR has been written or read between LoopDec and
182 // LoopEnd, expect that the decremented value is being used else where.
183 // Because this value isn't actually going to be produced until the
184 // latch, by LE, we would need to generate a real sub. The value is also
185 // likely to be copied/reloaded for use of LoopEnd - in which in case
186 // we'd need to perform an add because it gets subtracted again by LE!
187 // The other option is to then generate the other form of LE which doesn't
188 // perform the sub.
189 for (auto &MO : MI.operands()) {
190 if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
191 MO.getReg() == ARM::LR) {
192 LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
193 Revert = true;
194 break;
195 }
196 }
Sam Parkera6fd9192019-06-25 10:45:51 +0000197 }
198
199 if (Dec && End && Revert)
200 break;
201 }
202
Sam Parker4379a402019-07-22 14:16:40 +0000203 LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
204 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
205 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
206
Sam Parker98722692019-07-01 08:21:28 +0000207 if (!Start && !Dec && !End) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000208 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
209 return Changed;
Sam Parker4379a402019-07-22 14:16:40 +0000210 } else if (!(Start && Dec && End)) {
211 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
212 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +0000213 }
214
Sam Parkered2ea3e2019-07-30 08:08:44 +0000215 if (!End->getOperand(1).isMBB())
216 report_fatal_error("Expected LoopEnd to target basic block");
217
218 // TODO Maybe there's cases where the target doesn't have to be the header,
219 // but for now be safe and revert.
220 if (End->getOperand(1).getMBB() != ML->getHeader()) {
221 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
222 Revert = true;
223 }
Sam Parkera6fd9192019-06-25 10:45:51 +0000224
Sam Parker08b4a8d2019-07-11 09:56:15 +0000225 // The WLS and LE instructions have 12-bits for the label offset. WLS
226 // requires a positive offset, while LE uses negative.
227 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
228 !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
229 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
230 Revert = true;
231 }
232 if (Start->getOpcode() == ARM::t2WhileLoopStart &&
233 (BBUtils->getOffsetOf(Start) >
234 BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
235 !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
236 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
Sam Parkera6fd9192019-06-25 10:45:51 +0000237 Revert = true;
238 }
239
Sam Parkera6fd9192019-06-25 10:45:51 +0000240 Expand(ML, Start, Dec, End, Revert);
241 return true;
242}
243
Sam Parker775b2f52019-07-10 12:29:43 +0000244// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
245// beq that branches to the exit branch.
246// FIXME: Need to check that we're not trashing the CPSR when generating the
247// cmp. We could also try to generate a cbz if the value in LR is also in
248// another low register.
249void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
250 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
251 MachineBasicBlock *MBB = MI->getParent();
252 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
253 TII->get(ARM::t2CMPri));
Eli Friedman9b9a3082019-08-15 23:35:53 +0000254 MIB.add(MI->getOperand(0));
Sam Parker775b2f52019-07-10 12:29:43 +0000255 MIB.addImm(0);
256 MIB.addImm(ARMCC::AL);
Eli Friedman9b9a3082019-08-15 23:35:53 +0000257 MIB.addReg(ARM::NoRegister);
Sam Parker775b2f52019-07-10 12:29:43 +0000258
259 // TODO: Try to use tBcc instead
260 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
261 MIB.add(MI->getOperand(1)); // branch target
262 MIB.addImm(ARMCC::EQ); // condition code
263 MIB.addReg(ARM::CPSR);
264 MI->eraseFromParent();
265}
266
267// TODO: Check flags so that we can possibly generate a tSubs or tSub.
268void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
269 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
270 MachineBasicBlock *MBB = MI->getParent();
271 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
272 TII->get(ARM::t2SUBri));
273 MIB.addDef(ARM::LR);
274 MIB.add(MI->getOperand(1));
275 MIB.add(MI->getOperand(2));
276 MIB.addImm(ARMCC::AL);
277 MIB.addReg(0);
278 MIB.addReg(0);
279 MI->eraseFromParent();
280}
281
282// Generate a subs, or sub and cmp, and a branch instead of an LE.
283// FIXME: Need to check that we're not trashing the CPSR when generating
284// the cmp.
285void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
286 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
287
288 // Create cmp
289 MachineBasicBlock *MBB = MI->getParent();
290 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
291 TII->get(ARM::t2CMPri));
292 MIB.addReg(ARM::LR);
293 MIB.addImm(0);
294 MIB.addImm(ARMCC::AL);
Eli Friedman9b9a3082019-08-15 23:35:53 +0000295 MIB.addReg(ARM::NoRegister);
Sam Parker775b2f52019-07-10 12:29:43 +0000296
297 // TODO Try to use tBcc instead.
298 // Create bne
299 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
300 MIB.add(MI->getOperand(1)); // branch target
301 MIB.addImm(ARMCC::NE); // condition code
302 MIB.addReg(ARM::CPSR);
303 MI->eraseFromParent();
304}
305
Sam Parkera6fd9192019-06-25 10:45:51 +0000306void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
307 MachineInstr *Dec, MachineInstr *End,
308 bool Revert) {
309
310 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
311 // The trip count should already been held in LR since the instructions
312 // within the loop can only read and write to LR. So, there should be a
313 // mov to setup the count. WLS/DLS perform this move, so find the original
314 // and delete it - inserting WLS/DLS in its place.
315 MachineBasicBlock *MBB = Start->getParent();
316 MachineInstr *InsertPt = Start;
317 for (auto &I : MRI->def_instructions(ARM::LR)) {
318 if (I.getParent() != MBB)
319 continue;
320
321 // Always execute.
322 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
323 continue;
324
325 // Only handle move reg, if the trip count it will need moving into a reg
326 // before the setup instruction anyway.
327 if (!I.getDesc().isMoveReg() ||
328 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
329 continue;
330 InsertPt = &I;
331 break;
332 }
333
Sam Parker98722692019-07-01 08:21:28 +0000334 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
335 ARM::t2DLS : ARM::t2WLS;
Sam Parkera6fd9192019-06-25 10:45:51 +0000336 MachineInstrBuilder MIB =
Sam Parker98722692019-07-01 08:21:28 +0000337 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
Sam Parkera6fd9192019-06-25 10:45:51 +0000338
339 MIB.addDef(ARM::LR);
340 MIB.add(Start->getOperand(0));
Sam Parker98722692019-07-01 08:21:28 +0000341 if (Opc == ARM::t2WLS)
342 MIB.add(Start->getOperand(1));
343
344 if (InsertPt != Start)
345 InsertPt->eraseFromParent();
Sam Parkera6fd9192019-06-25 10:45:51 +0000346 Start->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000347 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
348 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000349 };
350
351 // Combine the LoopDec and LoopEnd instructions into LE(TP).
352 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
353 MachineInstr *End) {
354 MachineBasicBlock *MBB = End->getParent();
355 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
356 TII->get(ARM::t2LEUpdate));
357 MIB.addDef(ARM::LR);
358 MIB.add(End->getOperand(0));
359 MIB.add(End->getOperand(1));
360 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
361
Sam Parkera6fd9192019-06-25 10:45:51 +0000362 End->eraseFromParent();
363 Dec->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000364 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000365 };
366
Sam Parker98722692019-07-01 08:21:28 +0000367 // TODO: We should be able to automatically remove these branches before we
368 // get here - probably by teaching analyzeBranch about the pseudo
369 // instructions.
370 // If there is an unconditional branch, after I, that just branches to the
371 // next block, remove it.
372 auto RemoveDeadBranch = [](MachineInstr *I) {
373 MachineBasicBlock *BB = I->getParent();
374 MachineInstr *Terminator = &BB->instr_back();
375 if (Terminator->isUnconditionalBranch() && I != Terminator) {
376 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
377 if (BB->isLayoutSuccessor(Succ)) {
378 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
379 Terminator->eraseFromParent();
380 }
381 }
382 };
383
Sam Parkera6fd9192019-06-25 10:45:51 +0000384 if (Revert) {
Sam Parker98722692019-07-01 08:21:28 +0000385 if (Start->getOpcode() == ARM::t2WhileLoopStart)
Sam Parker775b2f52019-07-10 12:29:43 +0000386 RevertWhile(Start);
387 else
388 Start->eraseFromParent();
389 RevertLoopDec(Dec);
390 RevertLoopEnd(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000391 } else {
Sam Parker98722692019-07-01 08:21:28 +0000392 Start = ExpandLoopStart(ML, Start);
393 RemoveDeadBranch(Start);
394 End = ExpandLoopEnd(ML, Dec, End);
395 RemoveDeadBranch(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000396 }
397}
398
Sam Parker4379a402019-07-22 14:16:40 +0000399bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
400 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
401 bool Changed = false;
402
403 for (auto &MBB : MF) {
404 SmallVector<MachineInstr*, 4> Starts;
405 SmallVector<MachineInstr*, 4> Decs;
406 SmallVector<MachineInstr*, 4> Ends;
407
408 for (auto &I : MBB) {
409 if (IsLoopStart(I))
410 Starts.push_back(&I);
411 else if (I.getOpcode() == ARM::t2LoopDec)
412 Decs.push_back(&I);
413 else if (I.getOpcode() == ARM::t2LoopEnd)
414 Ends.push_back(&I);
415 }
416
417 if (Starts.empty() && Decs.empty() && Ends.empty())
418 continue;
419
420 Changed = true;
421
422 for (auto *Start : Starts) {
423 if (Start->getOpcode() == ARM::t2WhileLoopStart)
424 RevertWhile(Start);
425 else
426 Start->eraseFromParent();
427 }
428 for (auto *Dec : Decs)
429 RevertLoopDec(Dec);
430
431 for (auto *End : Ends)
432 RevertLoopEnd(End);
433 }
434 return Changed;
435}
436
Sam Parkera6fd9192019-06-25 10:45:51 +0000437FunctionPass *llvm::createARMLowOverheadLoopsPass() {
438 return new ARMLowOverheadLoops();
439}