blob: 6a3709dc03ff52e93051ff65623cf0f9853d213d [file] [log] [blame]
Sam Parkera6fd9192019-06-25 10:45:51 +00001//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10/// instructions into machine operations.
11/// The expectation is that the loop contains three pseudo instructions:
12/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13/// form should be in the preheader, whereas the while form should be in the
14/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
15/// pre-preheader?
16/// - t2LoopDec - placed within in the loop body.
17/// - t2LoopEnd - the loop latch terminator.
18///
19//===----------------------------------------------------------------------===//
20
21#include "ARM.h"
22#include "ARMBaseInstrInfo.h"
23#include "ARMBaseRegisterInfo.h"
24#include "ARMBasicBlockInfo.h"
25#include "ARMSubtarget.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "arm-low-overhead-loops"
33#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
34
35namespace {
36
37 class ARMLowOverheadLoops : public MachineFunctionPass {
38 const ARMBaseInstrInfo *TII = nullptr;
39 MachineRegisterInfo *MRI = nullptr;
40 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
41
42 public:
43 static char ID;
44
45 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesCFG();
49 AU.addRequired<MachineLoopInfo>();
50 MachineFunctionPass::getAnalysisUsage(AU);
51 }
52
53 bool runOnMachineFunction(MachineFunction &MF) override;
54
55 bool ProcessLoop(MachineLoop *ML);
56
57 void Expand(MachineLoop *ML, MachineInstr *Start,
58 MachineInstr *Dec, MachineInstr *End, bool Revert);
59
60 MachineFunctionProperties getRequiredProperties() const override {
61 return MachineFunctionProperties().set(
62 MachineFunctionProperties::Property::NoVRegs);
63 }
64
65 StringRef getPassName() const override {
66 return ARM_LOW_OVERHEAD_LOOPS_NAME;
67 }
68 };
69}
70
71char ARMLowOverheadLoops::ID = 0;
72
73INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
74 false, false)
75
76bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
Sam Parkerbcf0eb72019-06-25 15:11:17 +000077 if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
78 return false;
Sam Parkera6fd9192019-06-25 10:45:51 +000079
80 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
81
82 auto &MLI = getAnalysis<MachineLoopInfo>();
83 MRI = &MF.getRegInfo();
84 TII = static_cast<const ARMBaseInstrInfo*>(
85 MF.getSubtarget().getInstrInfo());
86 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
87 BBUtils->computeAllBlockSizes();
88
89 bool Changed = false;
90 for (auto ML : MLI) {
91 if (!ML->getParentLoop())
92 Changed |= ProcessLoop(ML);
93 }
94 return Changed;
95}
96
97bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
98
99 bool Changed = false;
100
101 // Process inner loops first.
102 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
103 Changed |= ProcessLoop(*I);
104
105 LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
106
107 auto IsLoopStart = [](MachineInstr &MI) {
108 return MI.getOpcode() == ARM::t2DoLoopStart;
109 };
110
111 auto SearchForStart =
112 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr* {
113 for (auto &MI : *MBB) {
114 if (IsLoopStart(MI))
115 return &MI;
116 }
117 return nullptr;
118 };
119
120 MachineInstr *Start = nullptr;
121 MachineInstr *Dec = nullptr;
122 MachineInstr *End = nullptr;
123 bool Revert = false;
124
125 if (auto *Preheader = ML->getLoopPreheader())
126 Start = SearchForStart(Preheader);
127
128 // Find the low-overhead loop components and decide whether or not to fall
129 // back to a normal loop.
130 for (auto *MBB : reverse(ML->getBlocks())) {
131 for (auto &MI : *MBB) {
132 if (MI.getOpcode() == ARM::t2LoopDec)
133 Dec = &MI;
134 else if (MI.getOpcode() == ARM::t2LoopEnd)
135 End = &MI;
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000136 else if (MI.getDesc().isCall())
137 // TODO: Though the call will require LE to execute again, does this
138 // mean we should revert? Always executing LE hopefully should be
139 // faster than performing a sub,cmp,br or even subs,br.
140 Revert = true;
Sam Parkera6fd9192019-06-25 10:45:51 +0000141
142 if (!Dec)
143 continue;
144
Sam Parkera6fd9192019-06-25 10:45:51 +0000145 // If we find that we load/store LR between LoopDec and LoopEnd, expect
146 // that the decremented value has been spilled to the stack. Because
147 // this value isn't actually going to be produced until the latch, by LE,
148 // we would need to generate a real sub. The value is also likely to be
149 // reloaded for use of LoopEnd - in which in case we'd need to perform
150 // an add because it gets negated again by LE! The other option is to
151 // then generate the other form of LE which doesn't perform the sub.
152 if (MI.mayLoad() || MI.mayStore())
153 Revert =
154 MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
155 }
156
157 if (Dec && End && Revert)
158 break;
159 }
160
161 if (Start || Dec || End) {
162 if (!Start || !Dec || !End)
163 report_fatal_error("Failed to find all loop components");
164 } else {
165 LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
166 return Changed;
167 }
168
169 if (!End->getOperand(1).isMBB() ||
170 End->getOperand(1).getMBB() != ML->getHeader())
171 report_fatal_error("Expected LoopEnd to target Loop Header");
172
173 // The LE instructions has 12-bits for the label offset.
174 if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
175 LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
176 Revert = true;
177 }
178
179 LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
180 << " - Found Loop Dec: " << *Dec
181 << " - Found Loop End: " << *End);
182
183 Expand(ML, Start, Dec, End, Revert);
184 return true;
185}
186
187void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
188 MachineInstr *Dec, MachineInstr *End,
189 bool Revert) {
190
191 auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
192 // The trip count should already been held in LR since the instructions
193 // within the loop can only read and write to LR. So, there should be a
194 // mov to setup the count. WLS/DLS perform this move, so find the original
195 // and delete it - inserting WLS/DLS in its place.
196 MachineBasicBlock *MBB = Start->getParent();
197 MachineInstr *InsertPt = Start;
198 for (auto &I : MRI->def_instructions(ARM::LR)) {
199 if (I.getParent() != MBB)
200 continue;
201
202 // Always execute.
203 if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
204 continue;
205
206 // Only handle move reg, if the trip count it will need moving into a reg
207 // before the setup instruction anyway.
208 if (!I.getDesc().isMoveReg() ||
209 !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
210 continue;
211 InsertPt = &I;
212 break;
213 }
214
215 MachineInstrBuilder MIB =
216 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(ARM::t2DLS));
217 if (InsertPt != Start)
218 InsertPt->eraseFromParent();
219
220 MIB.addDef(ARM::LR);
221 MIB.add(Start->getOperand(0));
222 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB);
223 Start->eraseFromParent();
224 };
225
226 // Combine the LoopDec and LoopEnd instructions into LE(TP).
227 auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
228 MachineInstr *End) {
229 MachineBasicBlock *MBB = End->getParent();
230 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
231 TII->get(ARM::t2LEUpdate));
232 MIB.addDef(ARM::LR);
233 MIB.add(End->getOperand(0));
234 MIB.add(End->getOperand(1));
235 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
236
237 // If there is a branch after loop end, which branches to the fallthrough
238 // block, remove the branch.
239 MachineBasicBlock *Latch = End->getParent();
240 MachineInstr *Terminator = &Latch->instr_back();
241 if (End != Terminator) {
242 MachineBasicBlock *Exit = ML->getExitBlock();
243 if (Latch->isLayoutSuccessor(Exit)) {
244 LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: "
245 << *Terminator);
246 Terminator->eraseFromParent();
247 }
248 }
249 End->eraseFromParent();
250 Dec->eraseFromParent();
251 };
252
253 // Generate a subs, or sub and cmp, and a branch instead of an LE.
254 // TODO: Check flags so that we can possibly generate a subs.
255 auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) {
256 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
257 // Create sub
258 MachineBasicBlock *MBB = Dec->getParent();
259 MachineInstrBuilder MIB = BuildMI(*MBB, Dec, Dec->getDebugLoc(),
260 TII->get(ARM::t2SUBri));
261 MIB.addDef(ARM::LR);
262 MIB.add(Dec->getOperand(1));
263 MIB.add(Dec->getOperand(2));
264 MIB.addImm(ARMCC::AL);
265 MIB.addReg(0);
266 MIB.addReg(0);
267
268 // Create cmp
269 MBB = End->getParent();
270 MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2CMPri));
271 MIB.addReg(ARM::LR);
272 MIB.addImm(0);
273 MIB.addImm(ARMCC::AL);
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000274 MIB.addReg(ARM::CPSR);
Sam Parkera6fd9192019-06-25 10:45:51 +0000275
276 // Create bne
277 MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc));
278 MIB.add(End->getOperand(1)); // branch target
279 MIB.addImm(ARMCC::NE); // condition code
Sam Parkerbcf0eb72019-06-25 15:11:17 +0000280 MIB.addReg(ARM::CPSR);
Sam Parkera6fd9192019-06-25 10:45:51 +0000281 End->eraseFromParent();
282 Dec->eraseFromParent();
283 };
284
285 if (Revert) {
286 Start->eraseFromParent();
287 ExpandBranch(Dec, End);
288 } else {
289 ExpandLoopStart(ML, Start);
290 ExpandLoopEnd(ML, Dec, End);
291 }
292}
293
294FunctionPass *llvm::createARMLowOverheadLoopsPass() {
295 return new ARMLowOverheadLoops();
296}