blob: b02b8d93b5b5030e1f4b054df516626c998503c0 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that collect the Linker Optimization Hint (LOH).
11// This pass should be run at the very end of the compilation flow, just before
12// assembly printer.
13// To be useful for the linker, the LOH must be printed into the assembly file.
14//
15// A LOH describes a sequence of instructions that may be optimized by the
16// linker.
17// This same sequence cannot be optimized by the compiler because some of
18// the information will be known at link time.
19// For instance, consider the following sequence:
20// L1: adrp xA, sym@PAGE
21// L2: add xB, xA, sym@PAGEOFF
22// L3: ldr xC, [xB, #imm]
23// This sequence can be turned into:
24// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25// L3: ldr xC, sym+#imm
26// It may also be turned into either the following more efficient
27// code sequences:
28// - If sym@PAGEOFF + #imm fits the encoding space of L3.
29// L1: adrp xA, sym@PAGE
30// L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
32// L1: adr xA, sym
33// L3: ldr xC, [xB, #imm]
34//
35// To be valid a LOH must meet all the requirements needed by all the related
36// possible linker transformations.
37// For instance, using the running example, the constraints to emit
38// ".loh AdrpAddLdr" are:
39// - L1, L2, and L3 instructions are of the expected type, i.e.,
40// respectively ADRP, ADD (immediate), and LD.
41// - The result of L1 is used only by L2.
42// - The register argument (xA) used in the ADD instruction is defined
43// only by L1.
44// - The result of L2 is used only by L3.
45// - The base address (xB) in L3 is defined only L2.
46// - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47// @PAGE/@PAGEOFF with no additional constants
48//
49// Currently supported LOHs are:
50// * So called non-ADRP-related:
51// - .loh AdrpAddLdr L1, L2, L3:
52// L1: adrp xA, sym@PAGE
53// L2: add xB, xA, sym@PAGEOFF
54// L3: ldr xC, [xB, #imm]
55// - .loh AdrpLdrGotLdr L1, L2, L3:
56// L1: adrp xA, sym@GOTPAGE
57// L2: ldr xB, [xA, sym@GOTPAGEOFF]
58// L3: ldr xC, [xB, #imm]
59// - .loh AdrpLdr L1, L3:
60// L1: adrp xA, sym@PAGE
61// L3: ldr xC, [xA, sym@PAGEOFF]
62// - .loh AdrpAddStr L1, L2, L3:
63// L1: adrp xA, sym@PAGE
64// L2: add xB, xA, sym@PAGEOFF
65// L3: str xC, [xB, #imm]
66// - .loh AdrpLdrGotStr L1, L2, L3:
67// L1: adrp xA, sym@GOTPAGE
68// L2: ldr xB, [xA, sym@GOTPAGEOFF]
69// L3: str xC, [xB, #imm]
70// - .loh AdrpAdd L1, L2:
71// L1: adrp xA, sym@PAGE
72// L2: add xB, xA, sym@PAGEOFF
73// For all these LOHs, L1, L2, L3 form a simple chain:
74// L1 result is used only by L2 and L2 result by L3.
75// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
76// by L1.
77// All these LOHs aim at using more efficient load/store patterns by folding
78// some instructions used to compute the address directly into the load/store.
79//
80// * So called ADRP-related:
81// - .loh AdrpAdrp L2, L1:
82// L2: ADRP xA, sym1@PAGE
83// L1: ADRP xA, sym2@PAGE
84// L2 dominates L1 and xA is not redifined between L2 and L1
85// This LOH aims at getting rid of redundant ADRP instructions.
86//
87// The overall design for emitting the LOHs is:
88// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90// 1. Associates them a label.
91// 2. Emits them in a MCStreamer (EmitLOHDirective).
92// - The MCMachOStreamer records them into the MCAssembler.
93// - The MCAsmStreamer prints them.
94// - Other MCStreamers ignore them.
95// 3. Closes the MCStreamer:
96// - The MachObjectWriter gets them from the MCAssembler and writes
97// them in the object file.
98// - Other ObjectWriters ignore them.
99//===----------------------------------------------------------------------===//
100
101#include "AArch64.h"
102#include "AArch64InstrInfo.h"
103#include "AArch64MachineFunctionInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +0000104#include "AArch64Subtarget.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000105#include "MCTargetDesc/AArch64AddressingModes.h"
106#include "llvm/ADT/BitVector.h"
107#include "llvm/ADT/DenseMap.h"
108#include "llvm/ADT/MapVector.h"
109#include "llvm/ADT/SetVector.h"
110#include "llvm/ADT/SmallVector.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000111#include "llvm/ADT/Statistic.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000112#include "llvm/CodeGen/MachineBasicBlock.h"
113#include "llvm/CodeGen/MachineDominators.h"
114#include "llvm/CodeGen/MachineFunctionPass.h"
115#include "llvm/CodeGen/MachineInstr.h"
116#include "llvm/CodeGen/MachineInstrBuilder.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000117#include "llvm/Support/CommandLine.h"
118#include "llvm/Support/Debug.h"
119#include "llvm/Support/ErrorHandling.h"
120#include "llvm/Support/raw_ostream.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000121#include "llvm/Target/TargetInstrInfo.h"
122#include "llvm/Target/TargetMachine.h"
123#include "llvm/Target/TargetRegisterInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000124using namespace llvm;
125
126#define DEBUG_TYPE "aarch64-collect-loh"
127
128static cl::opt<bool>
129PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
130 cl::desc("Restrict analysis to registers invovled"
131 " in LOHs"),
132 cl::init(true));
133
134static cl::opt<bool>
135BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
136 cl::desc("Restrict analysis at basic block scope"),
137 cl::init(true));
138
139STATISTIC(NumADRPSimpleCandidate,
140 "Number of simplifiable ADRP dominate by another");
James Y Knightd7d9e102016-08-30 03:16:16 +0000141#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000142STATISTIC(NumADRPComplexCandidate2,
143 "Number of simplifiable ADRP reachable by 2 defs");
144STATISTIC(NumADRPComplexCandidate3,
145 "Number of simplifiable ADRP reachable by 3 defs");
146STATISTIC(NumADRPComplexCandidateOther,
147 "Number of simplifiable ADRP reachable by 4 or more defs");
148STATISTIC(NumADDToSTRWithImm,
149 "Number of simplifiable STR with imm reachable by ADD");
150STATISTIC(NumLDRToSTRWithImm,
151 "Number of simplifiable STR with imm reachable by LDR");
152STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
153STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
154STATISTIC(NumADDToLDRWithImm,
155 "Number of simplifiable LDR with imm reachable by ADD");
156STATISTIC(NumLDRToLDRWithImm,
157 "Number of simplifiable LDR with imm reachable by LDR");
158STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
159STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
James Y Knightd7d9e102016-08-30 03:16:16 +0000160#endif // NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000161STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
James Y Knightd7d9e102016-08-30 03:16:16 +0000162#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000163STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
164STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
165STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
166STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
James Y Knightd7d9e102016-08-30 03:16:16 +0000167#endif // NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000168STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
169STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
170
Chad Rosier084b7862015-08-05 14:48:44 +0000171#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
172
Tim Northover3b0846e2014-05-24 12:50:23 +0000173namespace {
174struct AArch64CollectLOH : public MachineFunctionPass {
175 static char ID;
176 AArch64CollectLOH() : MachineFunctionPass(ID) {
177 initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
178 }
179
180 bool runOnMachineFunction(MachineFunction &MF) override;
181
Derek Schuff1dbf7a52016-04-04 17:09:25 +0000182 MachineFunctionProperties getRequiredProperties() const override {
183 return MachineFunctionProperties().set(
Matthias Braun1eb47362016-08-25 01:27:13 +0000184 MachineFunctionProperties::Property::NoVRegs);
Derek Schuff1dbf7a52016-04-04 17:09:25 +0000185 }
186
Tim Northover3b0846e2014-05-24 12:50:23 +0000187 const char *getPassName() const override {
Chad Rosier084b7862015-08-05 14:48:44 +0000188 return AARCH64_COLLECT_LOH_NAME;
Tim Northover3b0846e2014-05-24 12:50:23 +0000189 }
190
191 void getAnalysisUsage(AnalysisUsage &AU) const override {
192 AU.setPreservesAll();
193 MachineFunctionPass::getAnalysisUsage(AU);
194 AU.addRequired<MachineDominatorTree>();
195 }
196
197private:
198};
199
200/// A set of MachineInstruction.
201typedef SetVector<const MachineInstr *> SetOfMachineInstr;
202/// Map a basic block to a set of instructions per register.
203/// This is used to represent the exposed uses of a basic block
204/// per register.
Dylan Noblesmithb8994642014-08-25 01:59:38 +0000205typedef MapVector<const MachineBasicBlock *,
206 std::unique_ptr<SetOfMachineInstr[]>>
Tim Northover3b0846e2014-05-24 12:50:23 +0000207BlockToSetOfInstrsPerColor;
208/// Map a basic block to an instruction per register.
209/// This is used to represent the live-out definitions of a basic block
210/// per register.
Dylan Noblesmithb8994642014-08-25 01:59:38 +0000211typedef MapVector<const MachineBasicBlock *,
212 std::unique_ptr<const MachineInstr *[]>>
Tim Northover3b0846e2014-05-24 12:50:23 +0000213BlockToInstrPerColor;
214/// Map an instruction to a set of instructions. Used to represent the
215/// mapping def to reachable uses or use to definitions.
216typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
217/// Map a basic block to a BitVector.
218/// This is used to record the kill registers per basic block.
219typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
220
221/// Map a register to a dense id.
222typedef DenseMap<unsigned, unsigned> MapRegToId;
223/// Map a dense id to a register. Used for debug purposes.
224typedef SmallVector<unsigned, 32> MapIdToReg;
225} // end anonymous namespace.
226
227char AArch64CollectLOH::ID = 0;
228
229INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
Chad Rosier084b7862015-08-05 14:48:44 +0000230 AARCH64_COLLECT_LOH_NAME, false, false)
Tim Northover3b0846e2014-05-24 12:50:23 +0000231INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
232INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
Chad Rosier084b7862015-08-05 14:48:44 +0000233 AARCH64_COLLECT_LOH_NAME, false, false)
Tim Northover3b0846e2014-05-24 12:50:23 +0000234
235/// Given a couple (MBB, reg) get the corresponding set of instruction from
236/// the given "sets".
237/// If this couple does not reference any set, an empty set is added to "sets"
238/// for this couple and returned.
239/// \param nbRegs is used internally allocate some memory. It must be consistent
240/// with the way sets is used.
241static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
242 const MachineBasicBlock &MBB, unsigned reg,
243 unsigned nbRegs) {
244 SetOfMachineInstr *result;
245 BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
246 if (it != sets.end())
Dylan Noblesmithb8994642014-08-25 01:59:38 +0000247 result = it->second.get();
Tim Northover3b0846e2014-05-24 12:50:23 +0000248 else
Dylan Noblesmithb8994642014-08-25 01:59:38 +0000249 result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
Tim Northover3b0846e2014-05-24 12:50:23 +0000250
251 return result[reg];
252}
253
254/// Given a couple (reg, MI) get the corresponding set of instructions from the
255/// the given "sets".
256/// This is used to get the uses record in sets of a definition identified by
257/// MI and reg, i.e., MI defines reg.
258/// If the couple does not reference anything, an empty set is added to
259/// "sets[reg]".
260/// \pre set[reg] is valid.
261static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
262 const MachineInstr &MI) {
263 return sets[reg][&MI];
264}
265
266/// Same as getUses but does not modify the input map: sets.
267/// \return NULL if the couple (reg, MI) is not in sets.
268static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
269 const MachineInstr &MI) {
270 InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
271 if (Res != sets[reg].end())
272 return &(Res->second);
273 return nullptr;
274}
275
276/// Initialize the reaching definition algorithm:
277/// For each basic block BB in MF, record:
278/// - its kill set.
279/// - its reachable uses (uses that are exposed to BB's predecessors).
280/// - its the generated definitions.
281/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
282/// the list of uses of exposed defintions.
283/// \param ADRPMode specifies to only consider ADRP instructions for generated
284/// definition. It also consider definitions of ADRP instructions as uses and
285/// ignore other uses. The ADRPMode is used to collect the information for LHO
286/// that involve ADRP operation only.
Pete Cooperd987cd22015-03-11 21:40:25 +0000287static void initReachingDef(const MachineFunction &MF,
Tim Northover3b0846e2014-05-24 12:50:23 +0000288 InstrToInstrs *ColorOpToReachedUses,
289 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
290 BlockToSetOfInstrsPerColor &ReachableUses,
291 const MapRegToId &RegToId,
292 const MachineInstr *DummyOp, bool ADRPMode) {
Eric Christopher6c901622015-01-28 03:51:33 +0000293 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000294 unsigned NbReg = RegToId.size();
295
Pete Cooperd987cd22015-03-11 21:40:25 +0000296 for (const MachineBasicBlock &MBB : MF) {
Dylan Noblesmithb8994642014-08-25 01:59:38 +0000297 auto &BBGen = Gen[&MBB];
298 BBGen = make_unique<const MachineInstr *[]>(NbReg);
Dylan Noblesmith4af4d2c2014-08-26 03:33:26 +0000299 std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
Tim Northover3b0846e2014-05-24 12:50:23 +0000300
301 BitVector &BBKillSet = Kill[&MBB];
302 BBKillSet.resize(NbReg);
303 for (const MachineInstr &MI : MBB) {
304 bool IsADRP = MI.getOpcode() == AArch64::ADRP;
305
306 // Process uses first.
307 if (IsADRP || !ADRPMode)
308 for (const MachineOperand &MO : MI.operands()) {
309 // Treat ADRP def as use, as the goal of the analysis is to find
310 // ADRP defs reached by other ADRP defs.
311 if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
312 (ADRPMode && (!IsADRP || !MO.isDef())))
313 continue;
314 unsigned CurReg = MO.getReg();
315 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
316 if (ItCurRegId == RegToId.end())
317 continue;
318 CurReg = ItCurRegId->second;
319
320 // if CurReg has not been defined, this use is reachable.
321 if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
322 getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
323 // current basic block definition for this color, if any, is in Gen.
324 if (BBGen[CurReg])
325 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
326 }
327
328 // Process clobbers.
329 for (const MachineOperand &MO : MI.operands()) {
330 if (!MO.isRegMask())
331 continue;
332 // Clobbers kill the related colors.
333 const uint32_t *PreservedRegs = MO.getRegMask();
334
335 // Set generated regs.
Richard Trieu6b1aa5f2015-04-15 01:21:15 +0000336 for (const auto &Entry : RegToId) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000337 unsigned Reg = Entry.second;
338 // Use the global register ID when querying APIs external to this
339 // pass.
340 if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
341 // Do not register clobbered definition for no ADRP.
342 // This definition is not used anyway (otherwise register
343 // allocation is wrong).
344 BBGen[Reg] = ADRPMode ? &MI : nullptr;
345 BBKillSet.set(Reg);
346 }
347 }
348 }
349
350 // Process register defs.
351 for (const MachineOperand &MO : MI.operands()) {
352 if (!MO.isReg() || !MO.isDef())
353 continue;
354 unsigned CurReg = MO.getReg();
355 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
356 if (ItCurRegId == RegToId.end())
357 continue;
358
359 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
360 MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
Quentin Colombeta80b9c82015-08-31 19:02:00 +0000361 // If this alias has not been recorded, then it is not interesting
362 // for the current analysis.
363 // We can end up in this situation because of tuple registers.
364 // E.g., Let say we are interested in S1. When we register
365 // S1, we will also register its aliases and in particular
366 // the tuple Q1_Q2.
367 // Now, when we encounter Q1_Q2, we will look through its aliases
368 // and will find that S2 is not registered.
369 if (ItRegId == RegToId.end())
370 continue;
371
Tim Northover3b0846e2014-05-24 12:50:23 +0000372 BBKillSet.set(ItRegId->second);
373 BBGen[ItRegId->second] = &MI;
374 }
375 BBGen[ItCurRegId->second] = &MI;
376 }
377 }
378
379 // If we restrict our analysis to basic block scope, conservatively add a
380 // dummy
381 // use for each generated value.
382 if (!ADRPMode && DummyOp && !MBB.succ_empty())
383 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
384 if (BBGen[CurReg])
385 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
386 }
387}
388
389/// Reaching def core algorithm:
390/// while an Out has changed
391/// for each bb
392/// for each color
393/// In[bb][color] = U Out[bb.predecessors][color]
394/// insert reachableUses[bb][color] in each in[bb][color]
395/// op.reachedUses
396///
397/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
Pete Cooperd987cd22015-03-11 21:40:25 +0000398static void reachingDefAlgorithm(const MachineFunction &MF,
Tim Northover3b0846e2014-05-24 12:50:23 +0000399 InstrToInstrs *ColorOpToReachedUses,
400 BlockToSetOfInstrsPerColor &In,
401 BlockToSetOfInstrsPerColor &Out,
402 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
403 BlockToSetOfInstrsPerColor &ReachableUses,
404 unsigned NbReg) {
405 bool HasChanged;
406 do {
407 HasChanged = false;
Pete Cooperd987cd22015-03-11 21:40:25 +0000408 for (const MachineBasicBlock &MBB : MF) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000409 unsigned CurReg;
410 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
411 SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
412 SetOfMachineInstr &BBReachableUses =
413 getSet(ReachableUses, MBB, CurReg, NbReg);
414 SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
415 unsigned Size = BBOutSet.size();
416 // In[bb][color] = U Out[bb.predecessors][color]
Pete Cooperd987cd22015-03-11 21:40:25 +0000417 for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000418 SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
419 BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
420 }
421 // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
422 for (const MachineInstr *MI : BBInSet) {
423 SetOfMachineInstr &OpReachedUses =
424 getUses(ColorOpToReachedUses, CurReg, *MI);
425 OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
426 }
427 // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
428 if (!Kill[&MBB].test(CurReg))
429 BBOutSet.insert(BBInSet.begin(), BBInSet.end());
430 if (Gen[&MBB][CurReg])
431 BBOutSet.insert(Gen[&MBB][CurReg]);
432 HasChanged |= BBOutSet.size() != Size;
433 }
434 }
435 } while (HasChanged);
436}
437
Tim Northover3b0846e2014-05-24 12:50:23 +0000438/// Reaching definition algorithm.
439/// \param MF function on which the algorithm will operate.
440/// \param[out] ColorOpToReachedUses will contain the result of the reaching
441/// def algorithm.
442/// \param ADRPMode specify whether the reaching def algorithm should be tuned
443/// for ADRP optimization. \see initReachingDef for more details.
444/// \param DummyOp if not NULL, the algorithm will work at
445/// basic block scope and will set for every exposed definition a use to
446/// @p DummyOp.
447/// \pre ColorOpToReachedUses is an array of at least number of registers of
448/// InstrToInstrs.
Pete Cooperd987cd22015-03-11 21:40:25 +0000449static void reachingDef(const MachineFunction &MF,
Tim Northover3b0846e2014-05-24 12:50:23 +0000450 InstrToInstrs *ColorOpToReachedUses,
451 const MapRegToId &RegToId, bool ADRPMode = false,
452 const MachineInstr *DummyOp = nullptr) {
453 // structures:
454 // For each basic block.
455 // Out: a set per color of definitions that reach the
456 // out boundary of this block.
457 // In: Same as Out but for in boundary.
458 // Gen: generated color in this block (one operation per color).
459 // Kill: register set of killed color in this block.
460 // ReachableUses: a set per color of uses (operation) reachable
461 // for "In" definitions.
462 BlockToSetOfInstrsPerColor Out, In, ReachableUses;
463 BlockToInstrPerColor Gen;
464 BlockToRegSet Kill;
465
466 // Initialize Gen, kill and reachableUses.
467 initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
468 DummyOp, ADRPMode);
469
470 // Algo.
471 if (!DummyOp)
472 reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
473 ReachableUses, RegToId.size());
Tim Northover3b0846e2014-05-24 12:50:23 +0000474}
475
476#ifndef NDEBUG
477/// print the result of the reaching definition algorithm.
478static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
479 unsigned NbReg, const TargetRegisterInfo *TRI,
480 const MapIdToReg &IdToReg) {
481 unsigned CurReg;
482 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
483 if (ColorOpToReachedUses[CurReg].empty())
484 continue;
485 DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
486
487 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
488 DEBUG(dbgs() << "Def:\n");
489 DEBUG(DefsIt.first->print(dbgs()));
490 DEBUG(dbgs() << "Reachable uses:\n");
491 for (const MachineInstr *MI : DefsIt.second) {
492 DEBUG(MI->print(dbgs()));
493 }
494 }
495 }
496}
497#endif // NDEBUG
498
499/// Answer the following question: Can Def be one of the definition
500/// involved in a part of a LOH?
501static bool canDefBePartOfLOH(const MachineInstr *Def) {
502 unsigned Opc = Def->getOpcode();
503 // Accept ADRP, ADDLow and LOADGot.
504 switch (Opc) {
505 default:
506 return false;
507 case AArch64::ADRP:
508 return true;
509 case AArch64::ADDXri:
510 // Check immediate to see if the immediate is an address.
511 switch (Def->getOperand(2).getType()) {
512 default:
513 return false;
514 case MachineOperand::MO_GlobalAddress:
515 case MachineOperand::MO_JumpTableIndex:
516 case MachineOperand::MO_ConstantPoolIndex:
517 case MachineOperand::MO_BlockAddress:
518 return true;
519 }
520 case AArch64::LDRXui:
521 // Check immediate to see if the immediate is an address.
522 switch (Def->getOperand(2).getType()) {
523 default:
524 return false;
525 case MachineOperand::MO_GlobalAddress:
526 return true;
527 }
528 }
529 // Unreachable.
530 return false;
531}
532
533/// Check whether the given instruction can the end of a LOH chain involving a
534/// store.
535static bool isCandidateStore(const MachineInstr *Instr) {
536 switch (Instr->getOpcode()) {
537 default:
538 return false;
Quentin Colombetfa4ecb42015-08-27 23:47:10 +0000539 case AArch64::STRBBui:
540 case AArch64::STRHHui:
Tim Northover3b0846e2014-05-24 12:50:23 +0000541 case AArch64::STRBui:
542 case AArch64::STRHui:
543 case AArch64::STRWui:
544 case AArch64::STRXui:
545 case AArch64::STRSui:
546 case AArch64::STRDui:
547 case AArch64::STRQui:
548 // In case we have str xA, [xA, #imm], this is two different uses
549 // of xA and we cannot fold, otherwise the xA stored may be wrong,
550 // even if #imm == 0.
551 if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
552 return true;
553 }
554 return false;
555}
556
557/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
558/// Build the Use to Defs information and filter out obvious non-LOH candidates.
559/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
560/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
561/// i.e., no simple chain.
562/// \param ADRPMode -- \see initReachingDef.
563static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
564 const InstrToInstrs *ColorOpToReachedUses,
565 const MapRegToId &RegToId,
566 bool ADRPMode = false) {
567
568 SetOfMachineInstr NotCandidate;
569 unsigned NbReg = RegToId.size();
570 MapRegToId::const_iterator EndIt = RegToId.end();
571 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
572 // If this color is never defined, continue.
573 if (ColorOpToReachedUses[CurReg].empty())
574 continue;
575
576 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
577 for (const MachineInstr *MI : DefsIt.second) {
578 const MachineInstr *Def = DefsIt.first;
579 MapRegToId::const_iterator It;
580 // if all the reaching defs are not adrp, this use will not be
581 // simplifiable.
582 if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
583 (!ADRPMode && !canDefBePartOfLOH(Def)) ||
584 (!ADRPMode && isCandidateStore(MI) &&
585 // store are LOH candidate iff the end of the chain is used as
586 // base.
587 ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
588 It->second != CurReg))) {
589 NotCandidate.insert(MI);
590 continue;
591 }
592 // Do not consider self reaching as a simplifiable case for ADRP.
593 if (!ADRPMode || MI != DefsIt.first) {
594 UseToReachingDefs[MI].insert(DefsIt.first);
595 // If UsesIt has several reaching definitions, it is not
596 // candidate for simplificaton in non-ADRPMode.
597 if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
598 NotCandidate.insert(MI);
599 }
600 }
601 }
602 }
603 for (const MachineInstr *Elem : NotCandidate) {
604 DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
605 // It would have been better if we could just remove the entry
606 // from the map. Because of that, we have to filter the garbage
607 // (second.empty) in the subsequence analysis.
608 UseToReachingDefs[Elem].clear();
609 }
610}
611
612/// Based on the use to defs information (in ADRPMode), compute the
613/// opportunities of LOH ADRP-related.
614static void computeADRP(const InstrToInstrs &UseToDefs,
615 AArch64FunctionInfo &AArch64FI,
616 const MachineDominatorTree *MDT) {
617 DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
618 for (const auto &Entry : UseToDefs) {
619 unsigned Size = Entry.second.size();
620 if (Size == 0)
621 continue;
622 if (Size == 1) {
623 const MachineInstr *L2 = *Entry.second.begin();
624 const MachineInstr *L1 = Entry.first;
625 if (!MDT->dominates(L2, L1)) {
626 DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
627 << '\n');
628 continue;
629 }
630 DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
Manman Ren39b37c02016-07-05 22:24:44 +0000631 AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
Tim Northover3b0846e2014-05-24 12:50:23 +0000632 ++NumADRPSimpleCandidate;
633 }
James Y Knightd7d9e102016-08-30 03:16:16 +0000634#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000635 else if (Size == 2)
636 ++NumADRPComplexCandidate2;
637 else if (Size == 3)
638 ++NumADRPComplexCandidate3;
639 else
640 ++NumADRPComplexCandidateOther;
641#endif
642 // if Size < 1, the use should have been removed from the candidates
643 assert(Size >= 1 && "No reaching defs for that use!");
644 }
645}
646
647/// Check whether the given instruction can be the end of a LOH chain
648/// involving a load.
649static bool isCandidateLoad(const MachineInstr *Instr) {
650 switch (Instr->getOpcode()) {
651 default:
652 return false;
653 case AArch64::LDRSBWui:
654 case AArch64::LDRSBXui:
655 case AArch64::LDRSHWui:
656 case AArch64::LDRSHXui:
657 case AArch64::LDRSWui:
658 case AArch64::LDRBui:
659 case AArch64::LDRHui:
660 case AArch64::LDRWui:
661 case AArch64::LDRXui:
662 case AArch64::LDRSui:
663 case AArch64::LDRDui:
664 case AArch64::LDRQui:
665 if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
666 return false;
667 return true;
668 }
669 // Unreachable.
670 return false;
671}
672
673/// Check whether the given instruction can load a litteral.
674static bool supportLoadFromLiteral(const MachineInstr *Instr) {
675 switch (Instr->getOpcode()) {
676 default:
677 return false;
678 case AArch64::LDRSWui:
679 case AArch64::LDRWui:
680 case AArch64::LDRXui:
681 case AArch64::LDRSui:
682 case AArch64::LDRDui:
683 case AArch64::LDRQui:
684 return true;
685 }
686 // Unreachable.
687 return false;
688}
689
690/// Check whether the given instruction is a LOH candidate.
691/// \param UseToDefs is used to check that Instr is at the end of LOH supported
692/// chain.
693/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
694/// already been filtered out.
695static bool isCandidate(const MachineInstr *Instr,
696 const InstrToInstrs &UseToDefs,
697 const MachineDominatorTree *MDT) {
698 if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
699 return false;
700
701 const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
702 if (Def->getOpcode() != AArch64::ADRP) {
703 // At this point, Def is ADDXri or LDRXui of the right type of
704 // symbol, because we filtered out the uses that were not defined
705 // by these kind of instructions (+ ADRP).
706
707 // Check if this forms a simple chain: each intermediate node must
708 // dominates the next one.
709 if (!MDT->dominates(Def, Instr))
710 return false;
711 // Move one node up in the simple chain.
712 if (UseToDefs.find(Def) ==
713 UseToDefs.end()
714 // The map may contain garbage we have to ignore.
715 ||
716 UseToDefs.find(Def)->second.empty())
717 return false;
718 Instr = Def;
719 Def = *UseToDefs.find(Def)->second.begin();
720 }
721 // Check if we reached the top of the simple chain:
722 // - top is ADRP.
723 // - check the simple chain property: each intermediate node must
724 // dominates the next one.
725 if (Def->getOpcode() == AArch64::ADRP)
726 return MDT->dominates(Def, Instr);
727 return false;
728}
729
730static bool registerADRCandidate(const MachineInstr &Use,
731 const InstrToInstrs &UseToDefs,
732 const InstrToInstrs *DefsPerColorToUses,
733 AArch64FunctionInfo &AArch64FI,
734 SetOfMachineInstr *InvolvedInLOHs,
735 const MapRegToId &RegToId) {
736 // Look for opportunities to turn ADRP -> ADD or
737 // ADRP -> LDR GOTPAGEOFF into ADR.
738 // If ADRP has more than one use. Give up.
739 if (Use.getOpcode() != AArch64::ADDXri &&
740 (Use.getOpcode() != AArch64::LDRXui ||
741 !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
742 return false;
743 InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
744 // The map may contain garbage that we need to ignore.
745 if (It == UseToDefs.end() || It->second.empty())
746 return false;
747 const MachineInstr &Def = **It->second.begin();
748 if (Def.getOpcode() != AArch64::ADRP)
749 return false;
750 // Check the number of users of ADRP.
751 const SetOfMachineInstr *Users =
752 getUses(DefsPerColorToUses,
753 RegToId.find(Def.getOperand(0).getReg())->second, Def);
754 if (Users->size() > 1) {
755 ++NumADRComplexCandidate;
756 return false;
757 }
758 ++NumADRSimpleCandidate;
759 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
760 "ADRP already involved in LOH.");
761 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
762 "ADD already involved in LOH.");
763 DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
764
Benjamin Kramer3bc1edf2016-07-02 11:41:39 +0000765 AArch64FI.addLOHDirective(
766 Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
767 {&Def, &Use});
Tim Northover3b0846e2014-05-24 12:50:23 +0000768 return true;
769}
770
771/// Based on the use to defs information (in non-ADRPMode), compute the
772/// opportunities of LOH non-ADRP-related
773static void computeOthers(const InstrToInstrs &UseToDefs,
774 const InstrToInstrs *DefsPerColorToUses,
775 AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
776 const MachineDominatorTree *MDT) {
777 SetOfMachineInstr *InvolvedInLOHs = nullptr;
James Y Knightd7d9e102016-08-30 03:16:16 +0000778#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000779 SetOfMachineInstr InvolvedInLOHsStorage;
780 InvolvedInLOHs = &InvolvedInLOHsStorage;
James Y Knightd7d9e102016-08-30 03:16:16 +0000781#endif // NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000782 DEBUG(dbgs() << "*** Compute LOH for Others\n");
783 // ADRP -> ADD/LDR -> LDR/STR pattern.
784 // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
785
786 // FIXME: When the statistics are not important,
787 // This initial filtering loop can be merged into the next loop.
788 // Currently, we didn't do it to have the same code for both DEBUG and
789 // NDEBUG builds. Indeed, the iterator of the second loop would need
790 // to be changed.
791 SetOfMachineInstr PotentialCandidates;
792 SetOfMachineInstr PotentialADROpportunities;
793 for (auto &Use : UseToDefs) {
794 // If no definition is available, this is a non candidate.
795 if (Use.second.empty())
796 continue;
797 // Keep only instructions that are load or store and at the end of
798 // a ADRP -> ADD/LDR/Nothing chain.
799 // We already filtered out the no-chain cases.
800 if (!isCandidate(Use.first, UseToDefs, MDT)) {
801 PotentialADROpportunities.insert(Use.first);
802 continue;
803 }
804 PotentialCandidates.insert(Use.first);
805 }
806
807 // Make the following distinctions for statistics as the linker does
808 // know how to decode instructions:
809 // - ADD/LDR/Nothing make there different patterns.
810 // - LDR/STR make two different patterns.
811 // Hence, 6 - 1 base patterns.
812 // (because ADRP-> Nothing -> STR is not simplifiable)
813
814 // The linker is only able to have a simple semantic, i.e., if pattern A
815 // do B.
816 // However, we want to see the opportunity we may miss if we were able to
817 // catch more complex cases.
818
819 // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
820 // A potential candidate becomes a candidate, if its current immediate
821 // operand is zero and all nodes of the chain have respectively only one user
James Y Knightd7d9e102016-08-30 03:16:16 +0000822#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000823 SetOfMachineInstr DefsOfPotentialCandidates;
824#endif
825 for (const MachineInstr *Candidate : PotentialCandidates) {
826 // Get the definition of the candidate i.e., ADD or LDR.
827 const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
828 // Record the elements of the chain.
829 const MachineInstr *L1 = Def;
830 const MachineInstr *L2 = nullptr;
831 unsigned ImmediateDefOpc = Def->getOpcode();
832 if (Def->getOpcode() != AArch64::ADRP) {
833 // Check the number of users of this node.
834 const SetOfMachineInstr *Users =
835 getUses(DefsPerColorToUses,
836 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
837 if (Users->size() > 1) {
James Y Knightd7d9e102016-08-30 03:16:16 +0000838#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000839 // if all the uses of this def are in potential candidate, this is
840 // a complex candidate of level 2.
841 bool IsLevel2 = true;
842 for (const MachineInstr *MI : *Users) {
843 if (!PotentialCandidates.count(MI)) {
844 ++NumTooCplxLvl2;
845 IsLevel2 = false;
846 break;
847 }
848 }
849 if (IsLevel2)
850 ++NumCplxLvl2;
James Y Knightd7d9e102016-08-30 03:16:16 +0000851#endif // NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000852 PotentialADROpportunities.insert(Def);
853 continue;
854 }
855 L2 = Def;
856 Def = *UseToDefs.find(Def)->second.begin();
857 L1 = Def;
858 } // else the element in the middle of the chain is nothing, thus
859 // Def already contains the first element of the chain.
860
861 // Check the number of users of the first node in the chain, i.e., ADRP
862 const SetOfMachineInstr *Users =
863 getUses(DefsPerColorToUses,
864 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
865 if (Users->size() > 1) {
James Y Knightd7d9e102016-08-30 03:16:16 +0000866#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000867 // if all the uses of this def are in the defs of the potential candidate,
868 // this is a complex candidate of level 1
869 if (DefsOfPotentialCandidates.empty()) {
870 // lazy init
871 DefsOfPotentialCandidates = PotentialCandidates;
872 for (const MachineInstr *Candidate : PotentialCandidates) {
873 if (!UseToDefs.find(Candidate)->second.empty())
874 DefsOfPotentialCandidates.insert(
875 *UseToDefs.find(Candidate)->second.begin());
876 }
877 }
878 bool Found = false;
879 for (auto &Use : *Users) {
880 if (!DefsOfPotentialCandidates.count(Use)) {
881 ++NumTooCplxLvl1;
882 Found = true;
883 break;
884 }
885 }
886 if (!Found)
887 ++NumCplxLvl1;
James Y Knightd7d9e102016-08-30 03:16:16 +0000888#endif // NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000889 continue;
890 }
891
892 bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
893 // If the chain is three instructions long and ldr is the second element,
894 // then this ldr must load form GOT, otherwise this is not a correct chain.
Quentin Colombetfa4ecb42015-08-27 23:47:10 +0000895 if (L2 && !IsL2Add &&
896 !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
Tim Northover3b0846e2014-05-24 12:50:23 +0000897 continue;
898 SmallVector<const MachineInstr *, 3> Args;
899 MCLOHType Kind;
900 if (isCandidateLoad(Candidate)) {
901 if (!L2) {
902 // At this point, the candidate LOH indicates that the ldr instruction
903 // may use a direct access to the symbol. There is not such encoding
904 // for loads of byte and half.
905 if (!supportLoadFromLiteral(Candidate))
906 continue;
907
908 DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
909 << '\n');
910 Kind = MCLOH_AdrpLdr;
911 Args.push_back(L1);
912 Args.push_back(Candidate);
913 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
914 "L1 already involved in LOH.");
915 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
916 "Candidate already involved in LOH.");
917 ++NumADRPToLDR;
918 } else {
919 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
920 << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
921 << '\n');
922
923 Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
924 Args.push_back(L1);
925 Args.push_back(L2);
926 Args.push_back(Candidate);
927
928 PotentialADROpportunities.remove(L2);
929 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
930 "L1 already involved in LOH.");
931 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
932 "L2 already involved in LOH.");
933 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
934 "Candidate already involved in LOH.");
James Y Knightd7d9e102016-08-30 03:16:16 +0000935#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000936 // get the immediate of the load
937 if (Candidate->getOperand(2).getImm() == 0)
938 if (ImmediateDefOpc == AArch64::ADDXri)
939 ++NumADDToLDR;
940 else
941 ++NumLDRToLDR;
942 else if (ImmediateDefOpc == AArch64::ADDXri)
943 ++NumADDToLDRWithImm;
944 else
945 ++NumLDRToLDRWithImm;
James Y Knightd7d9e102016-08-30 03:16:16 +0000946#endif // NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000947 }
948 } else {
949 if (ImmediateDefOpc == AArch64::ADRP)
950 continue;
951 else {
952
953 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
954 << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
955 << '\n');
956
957 Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
958 Args.push_back(L1);
959 Args.push_back(L2);
960 Args.push_back(Candidate);
961
962 PotentialADROpportunities.remove(L2);
963 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
964 "L1 already involved in LOH.");
965 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
966 "L2 already involved in LOH.");
967 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
968 "Candidate already involved in LOH.");
James Y Knightd7d9e102016-08-30 03:16:16 +0000969#ifndef NDEBUG
Tim Northover3b0846e2014-05-24 12:50:23 +0000970 // get the immediate of the store
971 if (Candidate->getOperand(2).getImm() == 0)
972 if (ImmediateDefOpc == AArch64::ADDXri)
973 ++NumADDToSTR;
974 else
975 ++NumLDRToSTR;
976 else if (ImmediateDefOpc == AArch64::ADDXri)
977 ++NumADDToSTRWithImm;
978 else
979 ++NumLDRToSTRWithImm;
980#endif // DEBUG
981 }
982 }
983 AArch64FI.addLOHDirective(Kind, Args);
984 }
985
986 // Now, we grabbed all the big patterns, check ADR opportunities.
987 for (const MachineInstr *Candidate : PotentialADROpportunities)
988 registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
989 InvolvedInLOHs, RegToId);
990}
991
992/// Look for every register defined by potential LOHs candidates.
993/// Map these registers with dense id in @p RegToId and vice-versa in
994/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
Pete Cooperd987cd22015-03-11 21:40:25 +0000995static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
Tim Northover3b0846e2014-05-24 12:50:23 +0000996 MapIdToReg &IdToReg,
997 const TargetRegisterInfo *TRI) {
998 unsigned CurRegId = 0;
999 if (!PreCollectRegister) {
1000 unsigned NbReg = TRI->getNumRegs();
1001 for (; CurRegId < NbReg; ++CurRegId) {
1002 RegToId[CurRegId] = CurRegId;
1003 DEBUG(IdToReg.push_back(CurRegId));
1004 DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1005 }
1006 return;
1007 }
1008
1009 DEBUG(dbgs() << "** Collect Involved Register\n");
1010 for (const auto &MBB : MF) {
1011 for (const MachineInstr &MI : MBB) {
Quentin Colombetfa4ecb42015-08-27 23:47:10 +00001012 if (!canDefBePartOfLOH(&MI) &&
1013 !isCandidateLoad(&MI) && !isCandidateStore(&MI))
Tim Northover3b0846e2014-05-24 12:50:23 +00001014 continue;
1015
1016 // Process defs
1017 for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
1018 IOEnd = MI.operands_end();
1019 IO != IOEnd; ++IO) {
1020 if (!IO->isReg() || !IO->isDef())
1021 continue;
1022 unsigned CurReg = IO->getReg();
1023 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1024 if (RegToId.find(*AI) == RegToId.end()) {
1025 DEBUG(IdToReg.push_back(*AI);
1026 assert(IdToReg[CurRegId] == *AI &&
1027 "Reg index mismatches insertion index."));
1028 RegToId[*AI] = CurRegId++;
1029 DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1030 }
1031 }
1032 }
1033 }
1034}
1035
1036bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
Andrew Kaylor1ac98bb2016-04-25 21:58:52 +00001037 if (skipFunction(*MF.getFunction()))
1038 return false;
1039
Eric Christopher6c901622015-01-28 03:51:33 +00001040 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001041 const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1042
1043 MapRegToId RegToId;
1044 MapIdToReg IdToReg;
1045 AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
1046 assert(AArch64FI && "No MachineFunctionInfo for this function!");
1047
1048 DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
1049
1050 collectInvolvedReg(MF, RegToId, IdToReg, TRI);
1051 if (RegToId.empty())
1052 return false;
1053
1054 MachineInstr *DummyOp = nullptr;
1055 if (BasicBlockScopeOnly) {
Eric Christopher125898a2015-01-30 01:10:24 +00001056 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001057 // For local analysis, create a dummy operation to record uses that are not
1058 // local.
1059 DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
1060 }
1061
1062 unsigned NbReg = RegToId.size();
1063 bool Modified = false;
1064
1065 // Start with ADRP.
Dylan Noblesmithb06f77b2014-08-26 02:03:43 +00001066 InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
Tim Northover3b0846e2014-05-24 12:50:23 +00001067
1068 // Compute the reaching def in ADRP mode, meaning ADRP definitions
1069 // are first considered as uses.
1070 reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
1071 DEBUG(dbgs() << "ADRP reaching defs\n");
1072 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1073
1074 // Translate the definition to uses map into a use to definitions map to ease
1075 // statistic computation.
1076 InstrToInstrs ADRPToReachingDefs;
1077 reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1078
1079 // Compute LOH for ADRP.
1080 computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
Dylan Noblesmithb06f77b2014-08-26 02:03:43 +00001081 delete[] ColorOpToReachedUses;
1082
Tim Northover3b0846e2014-05-24 12:50:23 +00001083 // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
Dylan Noblesmithb06f77b2014-08-26 02:03:43 +00001084 ColorOpToReachedUses = new InstrToInstrs[NbReg];
Tim Northover3b0846e2014-05-24 12:50:23 +00001085
1086 // first perform a regular reaching def analysis.
1087 reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
1088 DEBUG(dbgs() << "All reaching defs\n");
1089 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1090
1091 // Turn that into a use to defs to ease statistic computation.
1092 InstrToInstrs UsesToReachingDefs;
1093 reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1094
1095 // Compute other than AdrpAdrp LOH.
1096 computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
1097 MDT);
Dylan Noblesmithb06f77b2014-08-26 02:03:43 +00001098 delete[] ColorOpToReachedUses;
Tim Northover3b0846e2014-05-24 12:50:23 +00001099
1100 if (BasicBlockScopeOnly)
1101 MF.DeleteMachineInstr(DummyOp);
1102
1103 return Modified;
1104}
1105
1106/// createAArch64CollectLOHPass - returns an instance of the Statistic for
1107/// linker optimization pass.
1108FunctionPass *llvm::createAArch64CollectLOHPass() {
1109 return new AArch64CollectLOH();
1110}