blob: 7783c116400e42d75dd19e54d278ef0566b7c1d7 [file] [log] [blame]
Eugene Zelenko60433b62017-10-05 00:33:50 +00001//===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===//
Alexey Bataev7cf32472015-12-04 10:53:15 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the pass that performs some optimizations with LEA
Andrey Turetskiy45b22a42016-05-19 10:18:29 +000011// instructions in order to improve performance and code size.
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +000012// Currently, it does two things:
13// 1) If there are two LEA instructions calculating addresses which only differ
14// by displacement inside a basic block, one of them is removed.
15// 2) Address calculations in load and store instructions are replaced by
Alexey Bataev7cf32472015-12-04 10:53:15 +000016// existing LEA def registers where possible.
17//
18//===----------------------------------------------------------------------===//
19
Eugene Zelenko60433b62017-10-05 00:33:50 +000020#include "MCTargetDesc/X86BaseInfo.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000021#include "X86.h"
22#include "X86InstrInfo.h"
23#include "X86Subtarget.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000024#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseMapInfo.h"
26#include "llvm/ADT/Hashing.h"
27#include "llvm/ADT/SmallVector.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000028#include "llvm/ADT/Statistic.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000029#include "llvm/CodeGen/MachineBasicBlock.h"
Jatin Bhateja328199e2017-12-01 14:07:38 +000030#include "llvm/CodeGen/MachineDominators.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000031#include "llvm/CodeGen/MachineFunction.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000032#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000033#include "llvm/CodeGen/MachineInstr.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000034#include "llvm/CodeGen/MachineInstrBuilder.h"
Andrey Turetskiy0babd262016-02-20 10:58:28 +000035#include "llvm/CodeGen/MachineOperand.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000036#include "llvm/CodeGen/MachineRegisterInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000037#include "llvm/CodeGen/TargetOpcodes.h"
38#include "llvm/CodeGen/TargetRegisterInfo.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000039#include "llvm/IR/DebugInfoMetadata.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000040#include "llvm/IR/DebugLoc.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000041#include "llvm/IR/Function.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000042#include "llvm/MC/MCInstrDesc.h"
43#include "llvm/Support/CommandLine.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000044#include "llvm/Support/Debug.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000045#include "llvm/Support/ErrorHandling.h"
46#include "llvm/Support/MathExtras.h"
Alexey Bataev7cf32472015-12-04 10:53:15 +000047#include "llvm/Support/raw_ostream.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000048#include <cassert>
49#include <cstdint>
50#include <iterator>
Alexey Bataev7cf32472015-12-04 10:53:15 +000051
52using namespace llvm;
53
54#define DEBUG_TYPE "x86-optimize-LEAs"
55
Andrey Turetskiy9994b882016-02-20 11:11:55 +000056static cl::opt<bool>
57 DisableX86LEAOpt("disable-x86-lea-opt", cl::Hidden,
58 cl::desc("X86: Disable LEA optimizations."),
59 cl::init(false));
Alexey Bataev7b72b652015-12-17 07:34:39 +000060
Alexey Bataev7cf32472015-12-04 10:53:15 +000061STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
Jatin Bhateja328199e2017-12-01 14:07:38 +000062STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +000063STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
Alexey Bataev7cf32472015-12-04 10:53:15 +000064
Andrey Turetskiybca0f992016-02-04 08:57:03 +000065/// \brief Returns true if two machine operands are identical and they are not
66/// physical registers.
67static inline bool isIdenticalOp(const MachineOperand &MO1,
68 const MachineOperand &MO2);
69
Jatin Bhateja328199e2017-12-01 14:07:38 +000070/// \brief Returns true if two machine instructions have identical operands.
71static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
72 const MachineOperand &MO2);
73
Andrey Turetskiy0babd262016-02-20 10:58:28 +000074/// \brief Returns true if two address displacement operands are of the same
75/// type and use the same symbol/index/address regardless of the offset.
76static bool isSimilarDispOp(const MachineOperand &MO1,
77 const MachineOperand &MO2);
78
Andrey Turetskiybca0f992016-02-04 08:57:03 +000079/// \brief Returns true if the instruction is LEA.
80static inline bool isLEA(const MachineInstr &MI);
81
Jatin Bhateja328199e2017-12-01 14:07:38 +000082/// \brief Returns true if Definition of Operand is a copylike instruction.
83static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
84
Benjamin Kramerb7d33112016-08-06 11:13:10 +000085namespace {
Eugene Zelenko60433b62017-10-05 00:33:50 +000086
Andrey Turetskiybca0f992016-02-04 08:57:03 +000087/// A key based on instruction's memory operands.
88class MemOpKey {
89public:
90 MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
91 const MachineOperand *Index, const MachineOperand *Segment,
Jatin Bhateja328199e2017-12-01 14:07:38 +000092 const MachineOperand *Disp, bool DispCheck = false)
93 : Disp(Disp), DeepCheck(DispCheck) {
Andrey Turetskiybca0f992016-02-04 08:57:03 +000094 Operands[0] = Base;
95 Operands[1] = Scale;
96 Operands[2] = Index;
97 Operands[3] = Segment;
98 }
99
Jatin Bhateja328199e2017-12-01 14:07:38 +0000100 /// Checks operands of MemOpKey are identical, if Base or Index
101 /// operand definitions are of kind SUBREG_TO_REG then compare
102 /// operands of defining MI.
103 bool performDeepCheck(const MemOpKey &Other) const {
104 MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
105 MachineRegisterInfo *MRI = MI->getRegInfo();
106
107 for (int i = 0; i < 4; i++) {
108 bool CopyLike = isDefCopyLike(MRI, *Operands[i]);
109 if (CopyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
110 return false;
111 else if (!CopyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
112 return false;
113 }
114 return isIdenticalOp(*Disp, *Other.Disp);
115 }
116
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000117 bool operator==(const MemOpKey &Other) const {
Jatin Bhateja328199e2017-12-01 14:07:38 +0000118 if (DeepCheck)
119 return performDeepCheck(Other);
120
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000121 // Addresses' bases, scales, indices and segments must be identical.
122 for (int i = 0; i < 4; ++i)
123 if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
124 return false;
125
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000126 // Addresses' displacements don't have to be exactly the same. It only
127 // matters that they use the same symbol/index/address. Immediates' or
128 // offsets' differences will be taken care of during instruction
129 // substitution.
130 return isSimilarDispOp(*Disp, *Other.Disp);
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000131 }
132
133 // Address' base, scale, index and segment operands.
134 const MachineOperand *Operands[4];
135
136 // Address' displacement operand.
137 const MachineOperand *Disp;
Jatin Bhateja328199e2017-12-01 14:07:38 +0000138
139 // If true checks Address' base, index, segment and
140 // displacement are identical, in additions if base/index
141 // are defined by copylike instruction then futher
142 // compare the operands of the defining instruction.
143 bool DeepCheck;
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000144};
Eugene Zelenko60433b62017-10-05 00:33:50 +0000145
Benjamin Kramerb7d33112016-08-06 11:13:10 +0000146} // end anonymous namespace
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000147
148/// Provide DenseMapInfo for MemOpKey.
149namespace llvm {
Eugene Zelenko60433b62017-10-05 00:33:50 +0000150
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000151template <> struct DenseMapInfo<MemOpKey> {
Eugene Zelenko60433b62017-10-05 00:33:50 +0000152 using PtrInfo = DenseMapInfo<const MachineOperand *>;
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000153
154 static inline MemOpKey getEmptyKey() {
155 return MemOpKey(PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(),
156 PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(),
157 PtrInfo::getEmptyKey());
158 }
159
160 static inline MemOpKey getTombstoneKey() {
161 return MemOpKey(PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(),
162 PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(),
163 PtrInfo::getTombstoneKey());
164 }
165
166 static unsigned getHashValue(const MemOpKey &Val) {
167 // Checking any field of MemOpKey is enough to determine if the key is
168 // empty or tombstone.
Jatin Bhateja328199e2017-12-01 14:07:38 +0000169 hash_code Hash(0);
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000170 assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
171 assert(Val.Disp != PtrInfo::getTombstoneKey() &&
172 "Cannot hash the tombstone key");
173
Jatin Bhateja328199e2017-12-01 14:07:38 +0000174 auto getMIHash = [](MachineInstr *MI) -> hash_code {
175 hash_code h(0);
176 for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
177 h = hash_combine(h, MI->getOperand(i));
178 return h;
179 };
180
181 const MachineOperand &Base = *Val.Operands[0];
182 const MachineOperand &Index = *Val.Operands[2];
183 MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
184 MachineRegisterInfo *MRI = MI->getRegInfo();
185
186 if (isDefCopyLike(MRI, Base))
187 Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
188 else
189 Hash = hash_combine(Hash, Base);
190
191 if (isDefCopyLike(MRI, Index))
192 Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
193 else
194 Hash = hash_combine(Hash, Index);
195
196 Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000197
198 // If the address displacement is an immediate, it should not affect the
199 // hash so that memory operands which differ only be immediate displacement
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000200 // would have the same hash. If the address displacement is something else,
201 // we should reflect symbol/index/address in the hash.
202 switch (Val.Disp->getType()) {
203 case MachineOperand::MO_Immediate:
204 break;
205 case MachineOperand::MO_ConstantPoolIndex:
206 case MachineOperand::MO_JumpTableIndex:
207 Hash = hash_combine(Hash, Val.Disp->getIndex());
208 break;
209 case MachineOperand::MO_ExternalSymbol:
210 Hash = hash_combine(Hash, Val.Disp->getSymbolName());
211 break;
212 case MachineOperand::MO_GlobalAddress:
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000213 Hash = hash_combine(Hash, Val.Disp->getGlobal());
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000214 break;
215 case MachineOperand::MO_BlockAddress:
216 Hash = hash_combine(Hash, Val.Disp->getBlockAddress());
217 break;
218 case MachineOperand::MO_MCSymbol:
219 Hash = hash_combine(Hash, Val.Disp->getMCSymbol());
220 break;
Andrey Turetskiyb4056062016-04-26 12:18:12 +0000221 case MachineOperand::MO_MachineBasicBlock:
222 Hash = hash_combine(Hash, Val.Disp->getMBB());
223 break;
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000224 default:
225 llvm_unreachable("Invalid address displacement operand");
226 }
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000227
228 return (unsigned)Hash;
229 }
230
231 static bool isEqual(const MemOpKey &LHS, const MemOpKey &RHS) {
232 // Checking any field of MemOpKey is enough to determine if the key is
233 // empty or tombstone.
234 if (RHS.Disp == PtrInfo::getEmptyKey())
235 return LHS.Disp == PtrInfo::getEmptyKey();
236 if (RHS.Disp == PtrInfo::getTombstoneKey())
237 return LHS.Disp == PtrInfo::getTombstoneKey();
238 return LHS == RHS;
239 }
240};
Eugene Zelenko60433b62017-10-05 00:33:50 +0000241
242} // end namespace llvm
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000243
Benjamin Kramerb7d33112016-08-06 11:13:10 +0000244/// \brief Returns a hash table key based on memory operands of \p MI. The
245/// number of the first memory operand of \p MI is specified through \p N.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000246static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
247 assert((isLEA(MI) || MI.mayLoadOrStore()) &&
248 "The instruction must be a LEA, a load or a store");
249 return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg),
250 &MI.getOperand(N + X86::AddrScaleAmt),
251 &MI.getOperand(N + X86::AddrIndexReg),
252 &MI.getOperand(N + X86::AddrSegmentReg),
253 &MI.getOperand(N + X86::AddrDisp));
254}
255
Jatin Bhateja328199e2017-12-01 14:07:38 +0000256static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
257 static MachineOperand DummyScale = MachineOperand::CreateImm(1);
258 assert((isLEA(MI) || MI.mayLoadOrStore()) &&
259 "The instruction must be a LEA, a load or a store");
260 return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
261 &MI.getOperand(N + X86::AddrIndexReg),
262 &MI.getOperand(N + X86::AddrSegmentReg),
263 &MI.getOperand(N + X86::AddrDisp), true);
264}
265
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000266static inline bool isIdenticalOp(const MachineOperand &MO1,
267 const MachineOperand &MO2) {
268 return MO1.isIdenticalTo(MO2) &&
269 (!MO1.isReg() ||
270 !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
271}
272
Jatin Bhateja328199e2017-12-01 14:07:38 +0000273static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
274 const MachineOperand &MO2) {
275 MachineInstr *MI1 = nullptr;
276 MachineInstr *MI2 = nullptr;
277 if (!MO1.isReg() || !MO2.isReg())
278 return false;
279
280 MI1 = MRI->getVRegDef(MO1.getReg());
281 MI2 = MRI->getVRegDef(MO2.getReg());
282 if (!MI1 || !MI2)
283 return false;
284 if (MI1->getOpcode() != MI2->getOpcode())
285 return false;
286 if (MI1->getNumOperands() != MI2->getNumOperands())
287 return false;
288 for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
289 if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
290 return false;
291 return true;
292}
293
Justin Bogner38e52172016-02-24 07:58:02 +0000294#ifndef NDEBUG
295static bool isValidDispOp(const MachineOperand &MO) {
296 return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
Andrey Turetskiyb4056062016-04-26 12:18:12 +0000297 MO.isGlobal() || MO.isBlockAddress() || MO.isMCSymbol() || MO.isMBB();
Justin Bogner38e52172016-02-24 07:58:02 +0000298}
299#endif
300
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000301static bool isSimilarDispOp(const MachineOperand &MO1,
302 const MachineOperand &MO2) {
303 assert(isValidDispOp(MO1) && isValidDispOp(MO2) &&
304 "Address displacement operand is not valid");
305 return (MO1.isImm() && MO2.isImm()) ||
306 (MO1.isCPI() && MO2.isCPI() && MO1.getIndex() == MO2.getIndex()) ||
307 (MO1.isJTI() && MO2.isJTI() && MO1.getIndex() == MO2.getIndex()) ||
308 (MO1.isSymbol() && MO2.isSymbol() &&
309 MO1.getSymbolName() == MO2.getSymbolName()) ||
310 (MO1.isGlobal() && MO2.isGlobal() &&
311 MO1.getGlobal() == MO2.getGlobal()) ||
312 (MO1.isBlockAddress() && MO2.isBlockAddress() &&
313 MO1.getBlockAddress() == MO2.getBlockAddress()) ||
314 (MO1.isMCSymbol() && MO2.isMCSymbol() &&
Andrey Turetskiyb4056062016-04-26 12:18:12 +0000315 MO1.getMCSymbol() == MO2.getMCSymbol()) ||
316 (MO1.isMBB() && MO2.isMBB() && MO1.getMBB() == MO2.getMBB());
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000317}
318
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000319static inline bool isLEA(const MachineInstr &MI) {
320 unsigned Opcode = MI.getOpcode();
321 return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
322 Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
323}
324
Jatin Bhateja328199e2017-12-01 14:07:38 +0000325static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
326 bool isInstrErased = !(Opr.isReg() && Opr.getParent()->getParent());
327 if (!Opr.isReg() || isInstrErased ||
328 TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
329 return false;
330 MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
331 return MI && MI->isCopyLike();
332}
333
Alexey Bataev7cf32472015-12-04 10:53:15 +0000334namespace {
Eugene Zelenko60433b62017-10-05 00:33:50 +0000335
Jatin Bhateja328199e2017-12-01 14:07:38 +0000336/// This class captures the functions and attributes
337/// needed to factorize LEA within and across basic
338/// blocks.LEA instruction with same BASE,OFFSET and
339/// INDEX are the candidates for factorization.
340class FactorizeLEAOpt {
341public:
342 using LEAListT = std::list<MachineInstr *>;
343 using LEAMapT = DenseMap<MemOpKey, LEAListT>;
344 using ValueT = DenseMap<MemOpKey, unsigned>;
345 using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
346 using ScopeStackT = std::vector<ScopeEntryT>;
347
348 FactorizeLEAOpt() = default;
349 FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
350 FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
351
352 void performCleanup() {
353 for (auto LEA : removedLEAs)
354 LEA->eraseFromParent();
355 LEAs.clear();
356 Stack.clear();
357 removedLEAs.clear();
358 }
359
360 LEAMapT &getLEAMap() { return LEAs; }
361 ScopeEntryT *getTopScope() { return &Stack.back(); }
362
363 void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
364
365 bool checkIfScheduledForRemoval(MachineInstr *Instr) {
366 return removedLEAs.find(Instr) != removedLEAs.end();
367 }
368
369 /// Push the ScopeEntry for the BasicBlock over Stack.
370 /// Also traverses over list of instruction and update
371 /// LEAs Map and ScopeEntry for each LEA instruction
372 /// found using insertLEA().
373 void collectDataForBasicBlock(MachineBasicBlock *MBB);
374
375 /// Stores the size of MachineInstr list corrosponding
376 /// to key K from LEAs MAP into the ScopeEntry of
377 /// the basic block, then insert the LEA at the beginning
378 /// of the list.
379 void insertLEA(MachineInstr *MI);
380
381 /// Pops out ScopeEntry of top most BasicBlock from the stack
382 /// and remove the LEA instructions contained in the scope
383 /// from the LEAs Map.
384 void removeDataForBasicBlock();
385
386 /// If LEA contains Physical Registers then its not a candidate
387 /// for factorizations since physical registers may violate SSA
388 /// semantics of MI.
389 bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
390
391private:
392 ScopeStackT Stack;
393 LEAMapT LEAs;
394 std::set<MachineInstr *> removedLEAs;
395};
396
397void FactorizeLEAOpt::collectDataForBasicBlock(MachineBasicBlock *MBB) {
398 ValueT EmptyMap;
399 ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
400 Stack.push_back(SE);
401 for (auto &MI : *MBB) {
402 if (isLEA(MI))
403 insertLEA(&MI);
404 }
405}
406
407void FactorizeLEAOpt::removeDataForBasicBlock() {
408 ScopeEntryT &SE = Stack.back();
409 for (auto MapEntry : SE.second) {
410 LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
411 assert((Itr != LEAs.end()) &&
412 "LEAs map must have a node corresponding to ScopeEntry's Key.");
413
414 while (((*Itr).second.size() > MapEntry.second))
415 (*Itr).second.pop_front();
416 // If list goes empty remove entry from LEAs Map.
417 if ((*Itr).second.empty())
418 LEAs.erase(Itr);
419 }
420 Stack.pop_back();
421}
422
423bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
424 if (!MI || !RecLevel)
425 return false;
426
427 MachineRegisterInfo *MRI = MI->getRegInfo();
428 for (auto Operand : MI->operands()) {
429 if (!Operand.isReg())
430 continue;
431 if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
432 return true;
433 MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
434 if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
435 containsPhyReg(OperDefMI, RecLevel - 1))
436 return true;
437 }
438 return false;
439}
440
441void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
442 unsigned lsize;
443 if (containsPhyReg(MI, 2))
444 return;
445
446 // Factorization is beneficial only for complex LEAs.
447 MachineOperand &Base = MI->getOperand(1);
448 MachineOperand &Index = MI->getOperand(3);
449 MachineOperand &Offset = MI->getOperand(4);
450 if ((Offset.isImm() && !Offset.getImm()) ||
451 (!Base.isReg() || !Base.getReg()) || (!Index.isReg() || !Index.getReg()))
452 return;
453
454 MemOpKey Key = getMemOpCSEKey(*MI, 1);
455 ScopeEntryT *TopScope = getTopScope();
456
457 LEAMapT::iterator Itr = LEAs.find(Key);
458 if (Itr == LEAs.end()) {
459 lsize = 0;
460 LEAs[Key].push_front(MI);
461 } else {
462 lsize = (*Itr).second.size();
463 (*Itr).second.push_front(MI);
464 }
465 if (TopScope->second.find(Key) == TopScope->second.end())
466 TopScope->second[Key] = lsize;
467}
468
Alexey Bataev7cf32472015-12-04 10:53:15 +0000469class OptimizeLEAPass : public MachineFunctionPass {
470public:
471 OptimizeLEAPass() : MachineFunctionPass(ID) {}
472
Mehdi Amini117296c2016-10-01 02:56:57 +0000473 StringRef getPassName() const override { return "X86 LEA Optimize"; }
Alexey Bataev7cf32472015-12-04 10:53:15 +0000474
475 /// \brief Loop over all of the basic blocks, replacing address
476 /// calculations in load and store instructions, if it's already
477 /// been calculated by LEA. Also, remove redundant LEAs.
478 bool runOnMachineFunction(MachineFunction &MF) override;
479
Jatin Bhateja328199e2017-12-01 14:07:38 +0000480 void getAnalysisUsage(AnalysisUsage &AU) const override {
481 AU.setPreservesCFG();
482 MachineFunctionPass::getAnalysisUsage(AU);
483 AU.addRequired<MachineDominatorTree>();
484 }
485
Alexey Bataev7cf32472015-12-04 10:53:15 +0000486private:
Eugene Zelenko60433b62017-10-05 00:33:50 +0000487 using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000488
Alexey Bataev7cf32472015-12-04 10:53:15 +0000489 /// \brief Returns a distance between two instructions inside one basic block.
490 /// Negative result means, that instructions occur in reverse order.
491 int calcInstrDist(const MachineInstr &First, const MachineInstr &Last);
492
493 /// \brief Choose the best \p LEA instruction from the \p List to replace
494 /// address calculation in \p MI instruction. Return the address displacement
Simon Pilgrim9d15fb32016-11-17 19:03:05 +0000495 /// and the distance between \p MI and the chosen \p BestLEA in
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000496 /// \p AddrDispShift and \p Dist.
Alexey Bataev7cf32472015-12-04 10:53:15 +0000497 bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000498 const MachineInstr &MI, MachineInstr *&BestLEA,
Alexey Bataev7cf32472015-12-04 10:53:15 +0000499 int64_t &AddrDispShift, int &Dist);
500
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000501 /// \brief Returns the difference between addresses' displacements of \p MI1
502 /// and \p MI2. The numbers of the first memory operands for the instructions
503 /// are specified through \p N1 and \p N2.
504 int64_t getAddrDispShift(const MachineInstr &MI1, unsigned N1,
505 const MachineInstr &MI2, unsigned N2) const;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000506
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000507 /// \brief Returns true if the \p Last LEA instruction can be replaced by the
508 /// \p First. The difference between displacements of the addresses calculated
509 /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
510 /// replacement of the \p Last LEA's uses with the \p First's def register.
511 bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000512 int64_t &AddrDispShift) const;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000513
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000514 /// \brief Find all LEA instructions in the basic block. Also, assign position
515 /// numbers to all instructions in the basic block to speed up calculation of
516 /// distance between them.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000517 void findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs);
Alexey Bataev7cf32472015-12-04 10:53:15 +0000518
519 /// \brief Removes redundant address calculations.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000520 bool removeRedundantAddrCalc(MemOpMap &LEAs);
Alexey Bataev7cf32472015-12-04 10:53:15 +0000521
Andrew Ng03e35b62017-04-28 08:44:30 +0000522 /// Replace debug value MI with a new debug value instruction using register
523 /// VReg with an appropriate offset and DIExpression to incorporate the
524 /// address displacement AddrDispShift. Return new debug value instruction.
525 MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned VReg,
526 int64_t AddrDispShift);
527
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000528 /// \brief Removes LEAs which calculate similar addresses.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000529 bool removeRedundantLEAs(MemOpMap &LEAs);
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000530
Jatin Bhateja328199e2017-12-01 14:07:38 +0000531 /// \brief Visit over basic blocks, collect LEAs in a scoped
532 /// hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
533 bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
534
535 bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
536
537 /// \brief Factor out LEAs which share Base,Index,Offset and Segment.
538 bool processBasicBlock(const MachineBasicBlock &MBB);
539
540 /// \brief Try to replace LEA with a lower strength instruction
541 /// to improves latency and throughput.
542 bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
543
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000544 DenseMap<const MachineInstr *, unsigned> InstrPos;
545
Jatin Bhateja328199e2017-12-01 14:07:38 +0000546 FactorizeLEAOpt FactorOpt;
547
548 MachineDominatorTree *DT;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000549 MachineRegisterInfo *MRI;
550 const X86InstrInfo *TII;
551 const X86RegisterInfo *TRI;
552
553 static char ID;
554};
Eugene Zelenko60433b62017-10-05 00:33:50 +0000555
556} // end anonymous namespace
557
Alexey Bataev7cf32472015-12-04 10:53:15 +0000558char OptimizeLEAPass::ID = 0;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000559
560FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
561
562int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
563 const MachineInstr &Last) {
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000564 // Both instructions must be in the same basic block and they must be
565 // presented in InstrPos.
566 assert(Last.getParent() == First.getParent() &&
Alexey Bataev7cf32472015-12-04 10:53:15 +0000567 "Instructions are in different basic blocks");
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000568 assert(InstrPos.find(&First) != InstrPos.end() &&
569 InstrPos.find(&Last) != InstrPos.end() &&
570 "Instructions' positions are undefined");
Alexey Bataev7cf32472015-12-04 10:53:15 +0000571
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000572 return InstrPos[&Last] - InstrPos[&First];
Alexey Bataev7cf32472015-12-04 10:53:15 +0000573}
574
575// Find the best LEA instruction in the List to replace address recalculation in
576// MI. Such LEA must meet these requirements:
577// 1) The address calculated by the LEA differs only by the displacement from
578// the address used in MI.
579// 2) The register class of the definition of the LEA is compatible with the
580// register class of the address base register of MI.
581// 3) Displacement of the new memory operand should fit in 1 byte if possible.
582// 4) The LEA should be as close to MI as possible, and prior to it if
583// possible.
584bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000585 const MachineInstr &MI,
586 MachineInstr *&BestLEA,
Alexey Bataev7cf32472015-12-04 10:53:15 +0000587 int64_t &AddrDispShift, int &Dist) {
588 const MachineFunction *MF = MI.getParent()->getParent();
589 const MCInstrDesc &Desc = MI.getDesc();
Craig Topper477649a2016-04-28 05:58:46 +0000590 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) +
Alexey Bataev7cf32472015-12-04 10:53:15 +0000591 X86II::getOperandBias(Desc);
592
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000593 BestLEA = nullptr;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000594
595 // Loop over all LEA instructions.
596 for (auto DefMI : List) {
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000597 // Get new address displacement.
598 int64_t AddrDispShiftTemp = getAddrDispShift(MI, MemOpNo, *DefMI, 1);
Alexey Bataev7cf32472015-12-04 10:53:15 +0000599
600 // Make sure address displacement fits 4 bytes.
601 if (!isInt<32>(AddrDispShiftTemp))
602 continue;
603
604 // Check that LEA def register can be used as MI address base. Some
605 // instructions can use a limited set of registers as address base, for
606 // example MOV8mr_NOREX. We could constrain the register class of the LEA
607 // def to suit MI, however since this case is very rare and hard to
608 // reproduce in a test it's just more reliable to skip the LEA.
609 if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
610 MRI->getRegClass(DefMI->getOperand(0).getReg()))
611 continue;
612
613 // Choose the closest LEA instruction from the list, prior to MI if
614 // possible. Note that we took into account resulting address displacement
615 // as well. Also note that the list is sorted by the order in which the LEAs
616 // occur, so the break condition is pretty simple.
617 int DistTemp = calcInstrDist(*DefMI, MI);
618 assert(DistTemp != 0 &&
619 "The distance between two different instructions cannot be zero");
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000620 if (DistTemp > 0 || BestLEA == nullptr) {
Alexey Bataev7cf32472015-12-04 10:53:15 +0000621 // Do not update return LEA, if the current one provides a displacement
622 // which fits in 1 byte, while the new candidate does not.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000623 if (BestLEA != nullptr && !isInt<8>(AddrDispShiftTemp) &&
Alexey Bataev7cf32472015-12-04 10:53:15 +0000624 isInt<8>(AddrDispShift))
625 continue;
626
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000627 BestLEA = DefMI;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000628 AddrDispShift = AddrDispShiftTemp;
629 Dist = DistTemp;
630 }
631
632 // FIXME: Maybe we should not always stop at the first LEA after MI.
633 if (DistTemp < 0)
634 break;
635 }
636
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000637 return BestLEA != nullptr;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000638}
639
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000640// Get the difference between the addresses' displacements of the two
641// instructions \p MI1 and \p MI2. The numbers of the first memory operands are
642// passed through \p N1 and \p N2.
643int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1,
644 const MachineInstr &MI2,
645 unsigned N2) const {
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000646 const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp);
647 const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp);
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000648
649 assert(isSimilarDispOp(Op1, Op2) &&
650 "Address displacement operands are not compatible");
651
652 // After the assert above we can be sure that both operands are of the same
653 // valid type and use the same symbol/index/address, thus displacement shift
654 // calculation is rather simple.
655 if (Op1.isJTI())
656 return 0;
657 return Op1.isImm() ? Op1.getImm() - Op2.getImm()
658 : Op1.getOffset() - Op2.getOffset();
Alexey Bataev7cf32472015-12-04 10:53:15 +0000659}
660
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000661// Check that the Last LEA can be replaced by the First LEA. To be so,
662// these requirements must be met:
663// 1) Addresses calculated by LEAs differ only by displacement.
664// 2) Def registers of LEAs belong to the same class.
665// 3) All uses of the Last LEA def register are replaceable, thus the
666// register is used only as address base.
667bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
668 const MachineInstr &Last,
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000669 int64_t &AddrDispShift) const {
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000670 assert(isLEA(First) && isLEA(Last) &&
671 "The function works only with LEA instructions");
672
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000673 // Make sure that LEA def registers belong to the same class. There may be
674 // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
675 // be used as their operands, so we must be sure that replacing one LEA
676 // with another won't lead to putting a wrong register in the instruction.
677 if (MRI->getRegClass(First.getOperand(0).getReg()) !=
678 MRI->getRegClass(Last.getOperand(0).getReg()))
679 return false;
680
Andrea Di Biagio7937be72017-03-21 11:36:21 +0000681 // Get new address displacement.
682 AddrDispShift = getAddrDispShift(Last, 1, First, 1);
683
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000684 // Loop over all uses of the Last LEA to check that its def register is
685 // used only as address base for memory accesses. If so, it can be
686 // replaced, otherwise - no.
Andrea Di Biagio7937be72017-03-21 11:36:21 +0000687 for (auto &MO : MRI->use_nodbg_operands(Last.getOperand(0).getReg())) {
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000688 MachineInstr &MI = *MO.getParent();
689
690 // Get the number of the first memory operand.
691 const MCInstrDesc &Desc = MI.getDesc();
Craig Topper477649a2016-04-28 05:58:46 +0000692 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000693
694 // If the use instruction has no memory operand - the LEA is not
695 // replaceable.
696 if (MemOpNo < 0)
697 return false;
698
699 MemOpNo += X86II::getOperandBias(Desc);
700
701 // If the address base of the use instruction is not the LEA def register -
702 // the LEA is not replaceable.
703 if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
704 return false;
705
706 // If the LEA def register is used as any other operand of the use
707 // instruction - the LEA is not replaceable.
708 for (unsigned i = 0; i < MI.getNumOperands(); i++)
709 if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
710 isIdenticalOp(MI.getOperand(i), MO))
711 return false;
712
713 // Check that the new address displacement will fit 4 bytes.
714 if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
715 !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
716 AddrDispShift))
717 return false;
718 }
719
720 return true;
721}
722
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000723void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) {
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000724 unsigned Pos = 0;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000725 for (auto &MI : MBB) {
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000726 // Assign the position number to the instruction. Note that we are going to
727 // move some instructions during the optimization however there will never
728 // be a need to move two instructions before any selected instruction. So to
729 // avoid multiple positions' updates during moves we just increase position
730 // counter by two leaving a free space for instructions which will be moved.
731 InstrPos[&MI] = Pos += 2;
732
Alexey Bataev7cf32472015-12-04 10:53:15 +0000733 if (isLEA(MI))
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000734 LEAs[getMemOpKey(MI, 1)].push_back(const_cast<MachineInstr *>(&MI));
Alexey Bataev7cf32472015-12-04 10:53:15 +0000735 }
736}
737
738// Try to find load and store instructions which recalculate addresses already
739// calculated by some LEA and replace their memory operands with its def
740// register.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000741bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
Alexey Bataev7cf32472015-12-04 10:53:15 +0000742 bool Changed = false;
743
Jatin Bhateja328199e2017-12-01 14:07:38 +0000744 if (LEAs.empty())
745 return Changed;
746
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000747 MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
Alexey Bataev7cf32472015-12-04 10:53:15 +0000748
749 // Process all instructions in basic block.
750 for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
751 MachineInstr &MI = *I++;
Alexey Bataev7cf32472015-12-04 10:53:15 +0000752
753 // Instruction must be load or store.
754 if (!MI.mayLoadOrStore())
755 continue;
756
757 // Get the number of the first memory operand.
758 const MCInstrDesc &Desc = MI.getDesc();
Craig Topper477649a2016-04-28 05:58:46 +0000759 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
Alexey Bataev7cf32472015-12-04 10:53:15 +0000760
761 // If instruction has no memory operand - skip it.
762 if (MemOpNo < 0)
763 continue;
764
765 MemOpNo += X86II::getOperandBias(Desc);
766
767 // Get the best LEA instruction to replace address calculation.
768 MachineInstr *DefMI;
769 int64_t AddrDispShift;
770 int Dist;
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000771 if (!chooseBestLEA(LEAs[getMemOpKey(MI, MemOpNo)], MI, DefMI, AddrDispShift,
772 Dist))
Alexey Bataev7cf32472015-12-04 10:53:15 +0000773 continue;
774
775 // If LEA occurs before current instruction, we can freely replace
776 // the instruction. If LEA occurs after, we can lift LEA above the
777 // instruction and this way to be able to replace it. Since LEA and the
778 // instruction have similar memory operands (thus, the same def
779 // instructions for these operands), we can always do that, without
780 // worries of using registers before their defs.
781 if (Dist < 0) {
782 DefMI->removeFromParent();
783 MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000784 InstrPos[DefMI] = InstrPos[&MI] - 1;
785
786 // Make sure the instructions' position numbers are sane.
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000787 assert(((InstrPos[DefMI] == 1 &&
788 MachineBasicBlock::iterator(DefMI) == MBB->begin()) ||
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000789 InstrPos[DefMI] >
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000790 InstrPos[&*std::prev(MachineBasicBlock::iterator(DefMI))]) &&
Alexey Bataev28f0c5e2016-01-11 11:52:29 +0000791 "Instruction positioning is broken");
Alexey Bataev7cf32472015-12-04 10:53:15 +0000792 }
793
794 // Since we can possibly extend register lifetime, clear kill flags.
795 MRI->clearKillFlags(DefMI->getOperand(0).getReg());
796
797 ++NumSubstLEAs;
798 DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););
799
800 // Change instruction operands.
801 MI.getOperand(MemOpNo + X86::AddrBaseReg)
802 .ChangeToRegister(DefMI->getOperand(0).getReg(), false);
803 MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1);
804 MI.getOperand(MemOpNo + X86::AddrIndexReg)
805 .ChangeToRegister(X86::NoRegister, false);
806 MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift);
807 MI.getOperand(MemOpNo + X86::AddrSegmentReg)
808 .ChangeToRegister(X86::NoRegister, false);
809
810 DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump(););
811
812 Changed = true;
813 }
814
815 return Changed;
816}
817
Andrew Ng03e35b62017-04-28 08:44:30 +0000818MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
819 unsigned VReg,
820 int64_t AddrDispShift) {
821 DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression());
822
Adrian Prantl109b2362017-04-28 17:51:05 +0000823 if (AddrDispShift != 0)
824 Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift,
825 DIExpression::WithStackValue);
Andrew Ng03e35b62017-04-28 08:44:30 +0000826
827 // Replace DBG_VALUE instruction with modified version.
828 MachineBasicBlock *MBB = MI.getParent();
829 DebugLoc DL = MI.getDebugLoc();
830 bool IsIndirect = MI.isIndirectDebugValue();
Andrew Ng03e35b62017-04-28 08:44:30 +0000831 const MDNode *Var = MI.getDebugVariable();
Adrian Prantl8b9bb532017-07-28 23:00:45 +0000832 if (IsIndirect)
833 assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
Andrew Ng03e35b62017-04-28 08:44:30 +0000834 return BuildMI(*MBB, MBB->erase(&MI), DL, TII->get(TargetOpcode::DBG_VALUE),
Adrian Prantl8b9bb532017-07-28 23:00:45 +0000835 IsIndirect, VReg, Var, Expr);
Andrew Ng03e35b62017-04-28 08:44:30 +0000836}
837
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000838// Try to find similar LEAs in the list and replace one with another.
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000839bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000840 bool Changed = false;
841
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000842 // Loop over all entries in the table.
843 for (auto &E : LEAs) {
844 auto &List = E.second;
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000845
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000846 // Loop over all LEA pairs.
847 auto I1 = List.begin();
848 while (I1 != List.end()) {
849 MachineInstr &First = **I1;
850 auto I2 = std::next(I1);
851 while (I2 != List.end()) {
852 MachineInstr &Last = **I2;
853 int64_t AddrDispShift;
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000854
Simon Pilgrim9d15fb32016-11-17 19:03:05 +0000855 // LEAs should be in occurrence order in the list, so we can freely
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000856 // replace later LEAs with earlier ones.
857 assert(calcInstrDist(First, Last) > 0 &&
Simon Pilgrim9d15fb32016-11-17 19:03:05 +0000858 "LEAs must be in occurrence order in the list");
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000859
860 // Check that the Last LEA instruction can be replaced by the First.
861 if (!isReplaceable(First, Last, AddrDispShift)) {
862 ++I2;
863 continue;
864 }
865
866 // Loop over all uses of the Last LEA and update their operands. Note
867 // that the correctness of this has already been checked in the
868 // isReplaceable function.
Andrew Ng03e35b62017-04-28 08:44:30 +0000869 unsigned FirstVReg = First.getOperand(0).getReg();
Andrea Di Biagio7937be72017-03-21 11:36:21 +0000870 unsigned LastVReg = Last.getOperand(0).getReg();
Andrew Ng03e35b62017-04-28 08:44:30 +0000871 for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end();
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000872 UI != UE;) {
873 MachineOperand &MO = *UI++;
874 MachineInstr &MI = *MO.getParent();
875
Andrew Ng03e35b62017-04-28 08:44:30 +0000876 if (MI.isDebugValue()) {
877 // Replace DBG_VALUE instruction with modified version using the
878 // register from the replacing LEA and the address displacement
879 // between the LEA instructions.
880 replaceDebugValue(MI, FirstVReg, AddrDispShift);
881 continue;
882 }
883
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000884 // Get the number of the first memory operand.
885 const MCInstrDesc &Desc = MI.getDesc();
886 int MemOpNo =
Craig Topper477649a2016-04-28 05:58:46 +0000887 X86II::getMemoryOperandNo(Desc.TSFlags) +
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000888 X86II::getOperandBias(Desc);
889
890 // Update address base.
Andrew Ng03e35b62017-04-28 08:44:30 +0000891 MO.setReg(FirstVReg);
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000892
893 // Update address disp.
Andrey Turetskiy0babd262016-02-20 10:58:28 +0000894 MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp);
895 if (Op.isImm())
896 Op.setImm(Op.getImm() + AddrDispShift);
897 else if (!Op.isJTI())
898 Op.setOffset(Op.getOffset() + AddrDispShift);
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000899 }
900
901 // Since we can possibly extend register lifetime, clear kill flags.
Andrew Ng03e35b62017-04-28 08:44:30 +0000902 MRI->clearKillFlags(FirstVReg);
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000903
904 ++NumRedundantLEAs;
905 DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
906
907 // By this moment, all of the Last LEA's uses must be replaced. So we
908 // can freely remove it.
Andrea Di Biagio7937be72017-03-21 11:36:21 +0000909 assert(MRI->use_empty(LastVReg) &&
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000910 "The LEA's def register must have no uses");
911 Last.eraseFromParent();
912
913 // Erase removed LEA from the list.
914 I2 = List.erase(I2);
915
Jatin Bhateja328199e2017-12-01 14:07:38 +0000916 // If List becomes empty remove it from LEAs map.
917 if (List.empty())
918 LEAs.erase(E.first);
919
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000920 Changed = true;
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000921 }
Andrey Turetskiybca0f992016-02-04 08:57:03 +0000922 ++I1;
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000923 }
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +0000924 }
925
926 return Changed;
927}
928
Jatin Bhateja328199e2017-12-01 14:07:38 +0000929static inline int getADDrrFromLEA(int LEAOpcode) {
930 switch (LEAOpcode) {
931 default:
932 llvm_unreachable("Unexpected LEA instruction");
933 case X86::LEA16r:
934 return X86::ADD16rr;
935 case X86::LEA32r:
936 return X86::ADD32rr;
937 case X86::LEA64_32r:
938 case X86::LEA64r:
939 return X86::ADD64rr;
940 }
941}
942
943bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
944 const MachineBasicBlock &BB) {
945 bool Changed = false;
946
947 // Loop over all entries in the table.
948 for (auto &E : LEAs) {
949 auto &List = E.second;
950
951 // Loop over all LEA pairs.
952 auto I1 = List.begin();
953 while (I1 != List.end()) {
954 MachineInstrBuilder NewMI;
955 MachineInstr &First = **I1;
956 MachineOperand &Res = First.getOperand(0);
957 MachineOperand &Base = First.getOperand(1);
958 MachineOperand &Scale = First.getOperand(2);
959 MachineOperand &Index = First.getOperand(3);
960 MachineOperand &Offset = First.getOperand(4);
961
962 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
963 const DebugLoc DL = First.getDebugLoc();
964
965 if (!Base.isReg() || !Index.isReg() || !Index.getReg()) {
966 I1++;
967 continue;
968 }
969
970 if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
971 TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
972 TargetRegisterInfo::isPhysicalRegister(Index.getReg())) {
973 I1++;
974 continue;
975 }
976
977 // Check for register class compatibility between Result and
978 // Index operands.
979 const TargetRegisterClass *ResRC = MRI->getRegClass(Res.getReg());
980 const TargetRegisterClass *IndexRC = MRI->getRegClass(Index.getReg());
981 if (TRI->getRegSizeInBits(*ResRC) != TRI->getRegSizeInBits(*IndexRC)) {
982 I1++;
983 continue;
984 }
985
986 MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
987 // R = B + I
988 if (Scale.isImm() && Scale.getImm() == 1 && Offset.isImm() &&
989 !Offset.getImm()) {
990 NewMI = BuildMI(MBB, &First, DL, ADDrr)
991 .addDef(Res.getReg())
992 .addUse(Base.getReg())
993 .addUse(Index.getReg());
994 Changed = NewMI.getInstr() != nullptr;
995 First.eraseFromParent();
996 I1 = List.erase(I1);
997
998 // If List becomes empty remove it from LEAs map.
999 if (List.empty())
1000 LEAs.erase(E.first);
1001 } else
1002 I1++;
1003 }
1004 }
1005 return Changed;
1006}
1007
1008bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
1009 bool cseDone = false;
1010
1011 // Legal scale value (1,2,4 & 8) vector.
1012 auto LegalScale = [](int scale) {
1013 return scale == 1 || scale == 2 || scale == 4 || scale == 8;
1014 };
1015
1016 auto CompareFn = [](const MachineInstr *Arg1,
1017 const MachineInstr *Arg2) -> bool {
1018 return Arg1->getOperand(2).getImm() >= Arg2->getOperand(2).getImm();
1019 };
1020
1021 // Loop over all entries in the table.
1022 for (auto &E : FactorOpt.getLEAMap()) {
1023 auto &List = E.second;
1024 if (List.size() > 1)
1025 List.sort(CompareFn);
1026
1027 // Loop over all LEA pairs.
1028 for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
1029 for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
1030 MachineInstr &LI1 = **Iter1;
1031 MachineInstr &LI2 = **Iter2;
1032
1033 if (!DT->dominates(&LI2, &LI1))
1034 continue;
1035
1036 int Scale1 = LI1.getOperand(2).getImm();
1037 int Scale2 = LI2.getOperand(2).getImm();
1038 assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
1039 DebugLoc DL = LI1.getDebugLoc();
1040
1041 // Continue if instruction has already been factorized.
1042 if (FactorOpt.checkIfScheduledForRemoval(&LI1))
1043 continue;
1044
1045 int Factor = Scale1 - Scale2;
1046 if (Factor > 0 && LegalScale(Factor)) {
1047 MachineOperand NewBase = LI2.getOperand(0);
1048 MachineOperand NewIndex = LI1.getOperand(3);
1049
1050 const TargetRegisterClass *LI2ResRC =
1051 MRI->getRegClass(LI2.getOperand(0).getReg());
1052 const TargetRegisterClass *LI1BaseRC =
1053 MRI->getRegClass(LI1.getOperand(1).getReg());
1054
1055 if (TRI->getRegSizeInBits(*LI1BaseRC) >
1056 TRI->getRegSizeInBits(*LI2ResRC)) {
1057 MachineInstr *LI1IndexDef =
1058 MRI->getVRegDef(LI1.getOperand(3).getReg());
1059 if (LI1IndexDef->getOpcode() != TargetOpcode::SUBREG_TO_REG)
1060 continue;
1061 MachineOperand &SubReg = LI1IndexDef->getOperand(2);
1062 const TargetRegisterClass *SubRegRC =
1063 MRI->getRegClass(SubReg.getReg());
1064 if (TRI->getRegSizeInBits(*SubRegRC) !=
1065 TRI->getRegSizeInBits(*LI2ResRC))
1066 continue;
1067 NewIndex = SubReg;
1068 }
1069
1070 DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
1071 MachineInstrBuilder NewMI =
1072 BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
1073 TII->get(LI1.getOpcode()))
1074 .addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1.
1075 .addUse(NewBase.getReg()) // Base = Dst to LI2.
1076 .addImm(Factor) // Scale = Diff b/w scales.
1077 .addUse(NewIndex.getReg()) // Index = Index of LI1.
1078 .addImm(0) // Disp = 0
1079 .addUse(
1080 LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
1081
1082 cseDone = NewMI.getInstr() != nullptr;
1083
1084 /// To preserve the SSA nature we need to remove Def flag
1085 /// from old result.
1086 LI1.getOperand(0).setIsDef(false);
1087
1088 /// Lazy removal shall ensure that replaced LEA remains
1089 /// till we finish processing all the basic block. This shall
1090 /// provide opportunity for further factorization based on
1091 /// the replaced LEA which will be legal since it has same
1092 /// destination as newly formed LEA.
1093 FactorOpt.addForLazyRemoval(&LI1);
1094
1095 NumFactoredLEAs++;
1096 DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
1097 }
1098 }
1099 }
1100 }
1101 return cseDone;
1102}
1103
1104bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
1105 bool Changed = false;
1106 using StackT = SmallVector<MachineDomTreeNode *, 16>;
1107 using VisitedNodeMapT = SmallSet<MachineDomTreeNode *, 16>;
1108
1109 StackT WorkList;
1110 VisitedNodeMapT DomNodeMap;
1111
1112 WorkList.push_back(DN);
1113 while (!WorkList.empty()) {
1114 MachineDomTreeNode *MDN = WorkList.back();
1115 FactorOpt.collectDataForBasicBlock(MDN->getBlock());
1116 Changed |= processBasicBlock(*MDN->getBlock());
1117
1118 if (DomNodeMap.find(MDN) == DomNodeMap.end()) {
1119 DomNodeMap.insert(MDN);
1120 for (auto Child : MDN->getChildren())
1121 WorkList.push_back(Child);
1122 }
1123
1124 MachineDomTreeNode *TDM = WorkList.back();
1125 if (MDN->getLevel() == TDM->getLevel()) {
1126 FactorOpt.removeDataForBasicBlock();
1127 DomNodeMap.erase(MDN);
1128 WorkList.pop_back();
1129 }
1130 }
1131 return Changed;
1132}
1133
1134bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
1135 bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
1136 FactorOpt.performCleanup();
1137 return Changed;
1138}
1139
Alexey Bataev7cf32472015-12-04 10:53:15 +00001140bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
1141 bool Changed = false;
Alexey Bataev7cf32472015-12-04 10:53:15 +00001142
Andrey Turetskiy45b22a42016-05-19 10:18:29 +00001143 if (DisableX86LEAOpt || skipFunction(*MF.getFunction()))
Alexey Bataev7cf32472015-12-04 10:53:15 +00001144 return false;
1145
1146 MRI = &MF.getRegInfo();
1147 TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
1148 TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
Jatin Bhateja328199e2017-12-01 14:07:38 +00001149 DT = &getAnalysis<MachineDominatorTree>();
1150
1151 // Attempt factorizing LEAs.
1152 Changed |= FactorizeLEAsAllBasicBlocks(MF);
Alexey Bataev7cf32472015-12-04 10:53:15 +00001153
1154 // Process all basic blocks.
1155 for (auto &MBB : MF) {
Andrey Turetskiybca0f992016-02-04 08:57:03 +00001156 MemOpMap LEAs;
Alexey Bataev28f0c5e2016-01-11 11:52:29 +00001157 InstrPos.clear();
Alexey Bataev7cf32472015-12-04 10:53:15 +00001158
1159 // Find all LEA instructions in basic block.
1160 findLEAs(MBB, LEAs);
1161
1162 // If current basic block has no LEAs, move on to the next one.
1163 if (LEAs.empty())
1164 continue;
1165
Andrey Turetskiy45b22a42016-05-19 10:18:29 +00001166 // Remove redundant LEA instructions.
1167 Changed |= removeRedundantLEAs(LEAs);
Andrey Turetskiy1ce2c992016-01-13 11:30:44 +00001168
Jatin Bhateja328199e2017-12-01 14:07:38 +00001169 // Strength reduce LEA instructions.
1170 Changed |= strengthReduceLEAs(LEAs, MBB);
1171
Andrey Turetskiy45b22a42016-05-19 10:18:29 +00001172 // Remove redundant address calculations. Do it only for -Os/-Oz since only
1173 // a code size gain is expected from this part of the pass.
1174 if (MF.getFunction()->optForSize())
1175 Changed |= removeRedundantAddrCalc(LEAs);
Alexey Bataev7cf32472015-12-04 10:53:15 +00001176 }
1177
1178 return Changed;
1179}