blob: 787da7542681c00b717668bf16ad650b7d2fe9ff [file] [log] [blame]
Sam Parkera6fd9192019-06-25 10:45:51 +00001//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10/// instructions into machine operations.
11/// The expectation is that the loop contains three pseudo instructions:
12/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13/// form should be in the preheader, whereas the while form should be in the
14/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
15/// pre-preheader?
16/// - t2LoopDec - placed within in the loop body.
17/// - t2LoopEnd - the loop latch terminator.
18///
19//===----------------------------------------------------------------------===//
20
21#include "ARM.h"
22#include "ARMBaseInstrInfo.h"
23#include "ARMBaseRegisterInfo.h"
24#include "ARMBasicBlockInfo.h"
25#include "ARMSubtarget.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "arm-low-overhead-loops"
33#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
34
35namespace {
36
37 class ARMLowOverheadLoops : public MachineFunctionPass {
38 const ARMBaseInstrInfo *TII = nullptr;
39 MachineRegisterInfo *MRI = nullptr;
40 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
41
42 public:
43 static char ID;
44
45 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
49 AU.addRequired<MachineLoopInfo>();
50 MachineFunctionPass::getAnalysisUsage(AU);
51 }
52
53 bool runOnMachineFunction(MachineFunction &MF) override;
54
55 bool ProcessLoop(MachineLoop *ML);
56
Sam Parker4379a402019-07-22 14:16:40 +000057 bool RevertNonLoops(MachineFunction &MF);
58
Sam Parker775b2f52019-07-10 12:29:43 +000059 void RevertWhile(MachineInstr *MI) const;
60
61 void RevertLoopDec(MachineInstr *MI) const;
62
63 void RevertLoopEnd(MachineInstr *MI) const;
64
Sam Parkera6fd9192019-06-25 10:45:51 +000065 void Expand(MachineLoop *ML, MachineInstr *Start,
66 MachineInstr *Dec, MachineInstr *End, bool Revert);
67
68 MachineFunctionProperties getRequiredProperties() const override {
69 return MachineFunctionProperties().set(
70 MachineFunctionProperties::Property::NoVRegs);
71 }
72
73 StringRef getPassName() const override {
74 return ARM_LOW_OVERHEAD_LOOPS_NAME;
75 }
76 };
77}
78
79char ARMLowOverheadLoops::ID = 0;
80
81INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
82 false, false)
83
84bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
Sam Parkerbcf0eb72019-06-25 15:11:17 +000085 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
86 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +000087
88 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
89
90 auto &MLI = getAnalysis<MachineLoopInfo>();
91 MRI = &MF.getRegInfo();
92 TII = static_cast<const ARMBaseInstrInfo*>(
93 MF.getSubtarget().getInstrInfo());
94 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
95 BBUtils->computeAllBlockSizes();
Sam Parker08b4a8d2019-07-11 09:56:15 +000096 BBUtils->adjustBBOffsetsAfter(&MF.front());
Sam Parkera6fd9192019-06-25 10:45:51 +000097
98 bool Changed = false;
99 for (auto ML : MLI) {
100 if (!ML->getParentLoop())
101 Changed |= ProcessLoop(ML);
102 }
Sam Parker4379a402019-07-22 14:16:40 +0000103 Changed |= RevertNonLoops(MF);
Sam Parkera6fd9192019-06-25 10:45:51 +0000104 return Changed;
105}
106
Sam Parker4379a402019-07-22 14:16:40 +0000107static bool IsLoopStart(MachineInstr &MI) {
108 return MI.getOpcode() == ARM::t2DoLoopStart ||
109 MI.getOpcode() == ARM::t2WhileLoopStart;
110}
111
Sam Parkera6fd9192019-06-25 10:45:51 +0000112bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
113
114 bool Changed = false;
115
116 // Process inner loops first.
117 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
118 Changed |= ProcessLoop(*I);
119
120 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
121
Sam Parker98722692019-07-01 08:21:28 +0000122 // Search the given block for a loop start instruction. If one isn't found,
123 // and there's only one predecessor block, search that one too.
124 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
Sam Parker4379a402019-07-22 14:16:40 +0000125 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
Sam Parkera6fd9192019-06-25 10:45:51 +0000126 for (auto &MI : *MBB) {
127 if (IsLoopStart(MI))
128 return &MI;
129 }
Sam Parker98722692019-07-01 08:21:28 +0000130 if (MBB->pred_size() == 1)
131 return SearchForStart(*MBB->pred_begin());
Sam Parkera6fd9192019-06-25 10:45:51 +0000132 return nullptr;
133 };
134
135 MachineInstr *Start = nullptr;
136 MachineInstr *Dec = nullptr;
137 MachineInstr *End = nullptr;
138 bool Revert = false;
139
Sam Parker98722692019-07-01 08:21:28 +0000140 // Search the preheader for the start intrinsic, or look through the
141 // predecessors of the header to find exactly one set.iterations intrinsic.
142 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
143 // with potentially multiple set.loop.iterations, so we need to enable this.
144 if (auto *Preheader = ML->getLoopPreheader()) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000145 Start = SearchForStart(Preheader);
Sam Parker98722692019-07-01 08:21:28 +0000146 } else {
147 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
148 << " - Performing manual predecessor search.\n");
149 MachineBasicBlock *Pred = nullptr;
150 for (auto *MBB : ML->getHeader()->predecessors()) {
151 if (!ML->contains(MBB)) {
152 if (Pred) {
153 LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
154 Start = nullptr;
155 break;
156 }
157 Pred = MBB;
158 Start = SearchForStart(MBB);
159 }
160 }
161 }
Sam Parkera6fd9192019-06-25 10:45:51 +0000162
163 // Find the low-overhead loop components and decide whether or not to fall
164 // back to a normal loop.
165 for (auto *MBB : reverse(ML->getBlocks())) {
166 for (auto &MI : *MBB) {
167 if (MI.getOpcode() == ARM::t2LoopDec)
168 Dec = &MI;
169 else if (MI.getOpcode() == ARM::t2LoopEnd)
170 End = &MI;
Sam Parker4379a402019-07-22 14:16:40 +0000171 else if (IsLoopStart(MI))
172 Start = &MI;
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000173 else if (MI.getDesc().isCall())
174 // TODO: Though the call will require LE to execute again, does this
175 // mean we should revert? Always executing LE hopefully should be
176 // faster than performing a sub,cmp,br or even subs,br.
177 Revert = true;
Sam Parkera6fd9192019-06-25 10:45:51 +0000178
179 if (!Dec)
180 continue;
181
Sam Parkera6fd9192019-06-25 10:45:51 +0000182 // If we find that we load/store LR between LoopDec and LoopEnd, expect
183 // that the decremented value has been spilled to the stack. Because
184 // this value isn't actually going to be produced until the latch, by LE,
185 // we would need to generate a real sub. The value is also likely to be
186 // reloaded for use of LoopEnd - in which in case we'd need to perform
187 // an add because it gets negated again by LE! The other option is to
188 // then generate the other form of LE which doesn't perform the sub.
189 if (MI.mayLoad() || MI.mayStore())
190 Revert =
191 MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
192 }
193
194 if (Dec && End && Revert)
195 break;
196 }
197
Sam Parker4379a402019-07-22 14:16:40 +0000198 LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
199 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
200 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
201
Sam Parker98722692019-07-01 08:21:28 +0000202 if (!Start && !Dec && !End) {
Sam Parkera6fd9192019-06-25 10:45:51 +0000203 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
204 return Changed;
Sam Parker4379a402019-07-22 14:16:40 +0000205 } else if (!(Start && Dec && End)) {
206 LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
207 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +0000208 }
209
210 if (!End->getOperand(1).isMBB() ||
211 End->getOperand(1).getMBB() != ML->getHeader())
212 report_fatal_error("Expected LoopEnd to target Loop Header");
213
Sam Parker08b4a8d2019-07-11 09:56:15 +0000214 // The WLS and LE instructions have 12-bits for the label offset. WLS
215 // requires a positive offset, while LE uses negative.
216 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
217 !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
218 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
219 Revert = true;
220 }
221 if (Start->getOpcode() == ARM::t2WhileLoopStart &&
222 (BBUtils->getOffsetOf(Start) >
223 BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
224 !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
225 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
Sam Parkera6fd9192019-06-25 10:45:51 +0000226 Revert = true;
227 }
228
Sam Parkera6fd9192019-06-25 10:45:51 +0000229 Expand(ML, Start, Dec, End, Revert);
230 return true;
231}
232
Sam Parker775b2f52019-07-10 12:29:43 +0000233// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
234// beq that branches to the exit branch.
235// FIXME: Need to check that we're not trashing the CPSR when generating the
236// cmp. We could also try to generate a cbz if the value in LR is also in
237// another low register.
238void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
239 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
240 MachineBasicBlock *MBB = MI->getParent();
241 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
242 TII->get(ARM::t2CMPri));
243 MIB.addReg(ARM::LR);
244 MIB.addImm(0);
245 MIB.addImm(ARMCC::AL);
246 MIB.addReg(ARM::CPSR);
247
248 // TODO: Try to use tBcc instead
249 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
250 MIB.add(MI->getOperand(1)); // branch target
251 MIB.addImm(ARMCC::EQ); // condition code
252 MIB.addReg(ARM::CPSR);
253 MI->eraseFromParent();
254}
255
256// TODO: Check flags so that we can possibly generate a tSubs or tSub.
257void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
258 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
259 MachineBasicBlock *MBB = MI->getParent();
260 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
261 TII->get(ARM::t2SUBri));
262 MIB.addDef(ARM::LR);
263 MIB.add(MI->getOperand(1));
264 MIB.add(MI->getOperand(2));
265 MIB.addImm(ARMCC::AL);
266 MIB.addReg(0);
267 MIB.addReg(0);
268 MI->eraseFromParent();
269}
270
271// Generate a subs, or sub and cmp, and a branch instead of an LE.
272// FIXME: Need to check that we're not trashing the CPSR when generating
273// the cmp.
274void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
275 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
276
277 // Create cmp
278 MachineBasicBlock *MBB = MI->getParent();
279 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
280 TII->get(ARM::t2CMPri));
281 MIB.addReg(ARM::LR);
282 MIB.addImm(0);
283 MIB.addImm(ARMCC::AL);
284 MIB.addReg(ARM::CPSR);
285
286 // TODO Try to use tBcc instead.
287 // Create bne
288 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
289 MIB.add(MI->getOperand(1)); // branch target
290 MIB.addImm(ARMCC::NE); // condition code
291 MIB.addReg(ARM::CPSR);
292 MI->eraseFromParent();
293}
294
Sam Parkera6fd9192019-06-25 10:45:51 +0000295void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
296 MachineInstr *Dec, MachineInstr *End,
297 bool Revert) {
298
299 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
300 // The trip count should already been held in LR since the instructions
301 // within the loop can only read and write to LR. So, there should be a
302 // mov to setup the count. WLS/DLS perform this move, so find the original
303 // and delete it - inserting WLS/DLS in its place.
304 MachineBasicBlock *MBB = Start->getParent();
305 MachineInstr *InsertPt = Start;
306 for (auto &I : MRI->def_instructions(ARM::LR)) {
307 if (I.getParent() != MBB)
308 continue;
309
310 // Always execute.
311 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
312 continue;
313
314 // Only handle move reg, if the trip count it will need moving into a reg
315 // before the setup instruction anyway.
316 if (!I.getDesc().isMoveReg() ||
317 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
318 continue;
319 InsertPt = &I;
320 break;
321 }
322
Sam Parker98722692019-07-01 08:21:28 +0000323 unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
324 ARM::t2DLS : ARM::t2WLS;
Sam Parkera6fd9192019-06-25 10:45:51 +0000325 MachineInstrBuilder MIB =
Sam Parker98722692019-07-01 08:21:28 +0000326 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
Sam Parkera6fd9192019-06-25 10:45:51 +0000327
328 MIB.addDef(ARM::LR);
329 MIB.add(Start->getOperand(0));
Sam Parker98722692019-07-01 08:21:28 +0000330 if (Opc == ARM::t2WLS)
331 MIB.add(Start->getOperand(1));
332
333 if (InsertPt != Start)
334 InsertPt->eraseFromParent();
Sam Parkera6fd9192019-06-25 10:45:51 +0000335 Start->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000336 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
337 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000338 };
339
340 // Combine the LoopDec and LoopEnd instructions into LE(TP).
341 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
342 MachineInstr *End) {
343 MachineBasicBlock *MBB = End->getParent();
344 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
345 TII->get(ARM::t2LEUpdate));
346 MIB.addDef(ARM::LR);
347 MIB.add(End->getOperand(0));
348 MIB.add(End->getOperand(1));
349 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
350
Sam Parkera6fd9192019-06-25 10:45:51 +0000351 End->eraseFromParent();
352 Dec->eraseFromParent();
Sam Parker98722692019-07-01 08:21:28 +0000353 return &*MIB;
Sam Parkera6fd9192019-06-25 10:45:51 +0000354 };
355
Sam Parker98722692019-07-01 08:21:28 +0000356 // TODO: We should be able to automatically remove these branches before we
357 // get here - probably by teaching analyzeBranch about the pseudo
358 // instructions.
359 // If there is an unconditional branch, after I, that just branches to the
360 // next block, remove it.
361 auto RemoveDeadBranch = [](MachineInstr *I) {
362 MachineBasicBlock *BB = I->getParent();
363 MachineInstr *Terminator = &BB->instr_back();
364 if (Terminator->isUnconditionalBranch() && I != Terminator) {
365 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
366 if (BB->isLayoutSuccessor(Succ)) {
367 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
368 Terminator->eraseFromParent();
369 }
370 }
371 };
372
Sam Parkera6fd9192019-06-25 10:45:51 +0000373 if (Revert) {
Sam Parker98722692019-07-01 08:21:28 +0000374 if (Start->getOpcode() == ARM::t2WhileLoopStart)
Sam Parker775b2f52019-07-10 12:29:43 +0000375 RevertWhile(Start);
376 else
377 Start->eraseFromParent();
378 RevertLoopDec(Dec);
379 RevertLoopEnd(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000380 } else {
Sam Parker98722692019-07-01 08:21:28 +0000381 Start = ExpandLoopStart(ML, Start);
382 RemoveDeadBranch(Start);
383 End = ExpandLoopEnd(ML, Dec, End);
384 RemoveDeadBranch(End);
Sam Parkera6fd9192019-06-25 10:45:51 +0000385 }
386}
387
Sam Parker4379a402019-07-22 14:16:40 +0000388bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
389 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
390 bool Changed = false;
391
392 for (auto &MBB : MF) {
393 SmallVector<MachineInstr*, 4> Starts;
394 SmallVector<MachineInstr*, 4> Decs;
395 SmallVector<MachineInstr*, 4> Ends;
396
397 for (auto &I : MBB) {
398 if (IsLoopStart(I))
399 Starts.push_back(&I);
400 else if (I.getOpcode() == ARM::t2LoopDec)
401 Decs.push_back(&I);
402 else if (I.getOpcode() == ARM::t2LoopEnd)
403 Ends.push_back(&I);
404 }
405
406 if (Starts.empty() && Decs.empty() && Ends.empty())
407 continue;
408
409 Changed = true;
410
411 for (auto *Start : Starts) {
412 if (Start->getOpcode() == ARM::t2WhileLoopStart)
413 RevertWhile(Start);
414 else
415 Start->eraseFromParent();
416 }
417 for (auto *Dec : Decs)
418 RevertLoopDec(Dec);
419
420 for (auto *End : Ends)
421 RevertLoopEnd(End);
422 }
423 return Changed;
424}
425
Sam Parkera6fd9192019-06-25 10:45:51 +0000426FunctionPass *llvm::createARMLowOverheadLoopsPass() {
427 return new ARMLowOverheadLoops();
428}