blob: 5027199e88bdca0bb7da5fda00492c40c7f5fe7d [file] [log] [blame]
Bill Wendlingca678352010-08-09 23:59:04 +00001//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Perform peephole optimizations on the machine code:
11//
12// - Optimize Extensions
13//
14// Optimization of sign / zero extension instructions. It may be extended to
15// handle other instructions with similar properties.
16//
17// On some targets, some instructions, e.g. X86 sign / zero extension, may
18// leave the source value in the lower part of the result. This optimization
19// will replace some uses of the pre-extension value with uses of the
20// sub-register of the results.
21//
22// - Optimize Comparisons
23//
24// Optimization of comparison instructions. For instance, in this code:
25//
26// sub r1, 1
27// cmp r1, 0
28// bz L1
29//
30// If the "sub" instruction all ready sets (or could be modified to set) the
31// same flag that the "cmp" instruction sets and that "bz" uses, then we can
32// eliminate the "cmp" instruction.
Evan Chenge4b8ac92011-03-15 05:13:13 +000033//
Manman Rendc8ad002012-05-11 01:30:47 +000034// Another instance, in this code:
35//
36// sub r1, r3 | sub r1, imm
37// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
38// bge L1
39//
40// If the branch instruction can use flag from "sub", then we can replace
41// "sub" with "subs" and eliminate the "cmp" instruction.
42//
Joel Jones24e440d2012-12-11 16:10:25 +000043// - Optimize Loads:
44//
45// Loads that can be folded into a later instruction. A load is foldable
46// if it loads to virtual registers and the virtual register defined has
47// a single use.
Quentin Colombetcf71c632013-09-13 18:26:31 +000048//
49// - Optimize Copies and Bitcast:
50//
51// Rewrite copies and bitcasts to avoid cross register bank copies
52// when possible.
53// E.g., Consider the following example, where capital and lower
54// letters denote different register file:
55// b = copy A <-- cross-bank copy
56// C = copy b <-- cross-bank copy
57// =>
58// b = copy A <-- cross-bank copy
59// C = copy A <-- same-bank copy
60//
61// E.g., for bitcast:
62// b = bitcast A <-- cross-bank copy
63// C = bitcast b <-- cross-bank copy
64// =>
65// b = bitcast A <-- cross-bank copy
66// C = copy A <-- same-bank copy
Bill Wendlingca678352010-08-09 23:59:04 +000067//===----------------------------------------------------------------------===//
68
Bill Wendlingca678352010-08-09 23:59:04 +000069#include "llvm/CodeGen/Passes.h"
Evan Cheng7f8ab6e2010-11-17 20:13:28 +000070#include "llvm/ADT/DenseMap.h"
Bill Wendlingca678352010-08-09 23:59:04 +000071#include "llvm/ADT/SmallPtrSet.h"
Evan Cheng7f8ab6e2010-11-17 20:13:28 +000072#include "llvm/ADT/SmallSet.h"
Bill Wendlingca678352010-08-09 23:59:04 +000073#include "llvm/ADT/Statistic.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000074#include "llvm/CodeGen/MachineDominators.h"
75#include "llvm/CodeGen/MachineInstrBuilder.h"
76#include "llvm/CodeGen/MachineRegisterInfo.h"
77#include "llvm/Support/CommandLine.h"
Craig Topper588ceec2012-12-17 03:56:00 +000078#include "llvm/Support/Debug.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000079#include "llvm/Target/TargetInstrInfo.h"
80#include "llvm/Target/TargetRegisterInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +000081#include "llvm/Target/TargetSubtargetInfo.h"
Bill Wendlingca678352010-08-09 23:59:04 +000082using namespace llvm;
83
Chandler Carruth1b9dde02014-04-22 02:02:50 +000084#define DEBUG_TYPE "peephole-opt"
85
Bill Wendlingca678352010-08-09 23:59:04 +000086// Optimize Extensions
87static cl::opt<bool>
88Aggressive("aggressive-ext-opt", cl::Hidden,
89 cl::desc("Aggressive extension optimization"));
90
Bill Wendlingc6627ee2010-11-01 20:41:43 +000091static cl::opt<bool>
92DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
93 cl::desc("Disable the peephole optimizer"));
94
Quentin Colombet1111e6f2014-07-01 14:33:36 +000095static cl::opt<bool>
96DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
97 cl::desc("Disable advanced copy optimization"));
98
Bill Wendling66284312010-08-27 20:39:09 +000099STATISTIC(NumReuse, "Number of extension results reused");
Evan Chenge4b8ac92011-03-15 05:13:13 +0000100STATISTIC(NumCmps, "Number of compares eliminated");
Lang Hames31bb57b2012-02-25 00:46:38 +0000101STATISTIC(NumImmFold, "Number of move immediate folded");
Manman Ren5759d012012-08-02 00:56:42 +0000102STATISTIC(NumLoadFold, "Number of loads folded");
Jakob Stoklund Olesen2382d322012-08-16 23:11:47 +0000103STATISTIC(NumSelects, "Number of selects optimized");
Quentin Colombetcf71c632013-09-13 18:26:31 +0000104STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized");
Bill Wendlingca678352010-08-09 23:59:04 +0000105
106namespace {
107 class PeepholeOptimizer : public MachineFunctionPass {
108 const TargetMachine *TM;
109 const TargetInstrInfo *TII;
110 MachineRegisterInfo *MRI;
111 MachineDominatorTree *DT; // Machine dominator tree
112
113 public:
114 static char ID; // Pass identification
Owen Anderson6c18d1a2010-10-19 17:21:58 +0000115 PeepholeOptimizer() : MachineFunctionPass(ID) {
116 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
117 }
Bill Wendlingca678352010-08-09 23:59:04 +0000118
Craig Topper4584cd52014-03-07 09:26:03 +0000119 bool runOnMachineFunction(MachineFunction &MF) override;
Bill Wendlingca678352010-08-09 23:59:04 +0000120
Craig Topper4584cd52014-03-07 09:26:03 +0000121 void getAnalysisUsage(AnalysisUsage &AU) const override {
Bill Wendlingca678352010-08-09 23:59:04 +0000122 AU.setPreservesCFG();
123 MachineFunctionPass::getAnalysisUsage(AU);
124 if (Aggressive) {
125 AU.addRequired<MachineDominatorTree>();
126 AU.addPreserved<MachineDominatorTree>();
127 }
128 }
129
130 private:
Jim Grosbachedcb8682012-05-01 23:21:41 +0000131 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
132 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
Hans Wennborg97a59ae2014-08-11 13:52:46 +0000133 SmallPtrSetImpl<MachineInstr*> &LocalMIs);
Jakob Stoklund Olesen2382d322012-08-16 23:11:47 +0000134 bool optimizeSelect(MachineInstr *MI);
Quentin Colombetcf71c632013-09-13 18:26:31 +0000135 bool optimizeCopyOrBitcast(MachineInstr *MI);
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000136 bool isMoveImmediate(MachineInstr *MI,
137 SmallSet<unsigned, 4> &ImmDefRegs,
138 DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
Jim Grosbachedcb8682012-05-01 23:21:41 +0000139 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000140 SmallSet<unsigned, 4> &ImmDefRegs,
141 DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
Lang Hames5dc14bd2014-04-02 22:59:58 +0000142 bool isLoadFoldable(MachineInstr *MI,
143 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
Bill Wendlingca678352010-08-09 23:59:04 +0000144 };
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000145
146 /// \brief Helper class to track the possible sources of a value defined by
147 /// a (chain of) copy related instructions.
148 /// Given a definition (instruction and definition index), this class
149 /// follows the use-def chain to find successive suitable sources.
150 /// The given source can be used to rewrite the definition into
151 /// def = COPY src.
152 ///
153 /// For instance, let us consider the following snippet:
154 /// v0 =
155 /// v2 = INSERT_SUBREG v1, v0, sub0
156 /// def = COPY v2.sub0
157 ///
158 /// Using a ValueTracker for def = COPY v2.sub0 will give the following
159 /// suitable sources:
160 /// v2.sub0 and v0.
161 /// Then, def can be rewritten into def = COPY v0.
162 class ValueTracker {
163 private:
164 /// The current point into the use-def chain.
165 const MachineInstr *Def;
166 /// The index of the definition in Def.
167 unsigned DefIdx;
168 /// The sub register index of the definition.
169 unsigned DefSubReg;
170 /// The register where the value can be found.
171 unsigned Reg;
172 /// Specifiy whether or not the value tracking looks through
173 /// complex instructions. When this is false, the value tracker
174 /// bails on everything that is not a copy or a bitcast.
175 ///
176 /// Note: This could have been implemented as a specialized version of
177 /// the ValueTracker class but that would have complicated the code of
178 /// the users of this class.
179 bool UseAdvancedTracking;
180 /// Optional MachineRegisterInfo used to perform some complex
181 /// tracking.
182 const MachineRegisterInfo *MRI;
183
184 /// \brief Dispatcher to the right underlying implementation of
185 /// getNextSource.
186 bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
187 /// \brief Specialized version of getNextSource for Copy instructions.
188 bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
189 /// \brief Specialized version of getNextSource for Bitcast instructions.
190 bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
191 /// \brief Specialized version of getNextSource for RegSequence
192 /// instructions.
193 bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
194 /// \brief Specialized version of getNextSource for InsertSubreg
195 /// instructions.
196 bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
197 /// \brief Specialized version of getNextSource for ExtractSubreg
198 /// instructions.
199 bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
200 /// \brief Specialized version of getNextSource for SubregToReg
201 /// instructions.
202 bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
203
204 public:
205 /// \brief Create a ValueTracker instance for the value defines by \p MI
206 /// at the operand index \p DefIdx.
207 /// \p DefSubReg represents the sub register index the value tracker will
208 /// track. It does not need to match the sub register index used in \p MI.
209 /// \p UseAdvancedTracking specifies whether or not the value tracker looks
210 /// through complex instructions. By default (false), it handles only copy
211 /// and bitcast instructions.
212 /// \p MRI useful to perform some complex checks.
213 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
214 bool UseAdvancedTracking = false,
215 const MachineRegisterInfo *MRI = nullptr)
216 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
217 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
218 assert(Def->getOperand(DefIdx).isDef() &&
219 Def->getOperand(DefIdx).isReg() &&
220 "Definition does not match machine instruction");
221 // Initially the value is in the defined register.
222 Reg = Def->getOperand(DefIdx).getReg();
223 }
224
225 /// \brief Following the use-def chain, get the next available source
226 /// for the tracked value.
227 /// When the returned value is not nullptr, getReg() gives the register
228 /// that contain the tracked value.
229 /// \note The sub register index returned in \p SrcSubReg must be used
230 /// on that getReg() to access the actual value.
231 /// \return Unless the returned value is nullptr (i.e., no source found),
232 /// \p SrcIdx gives the index of the next source in the returned
233 /// instruction and \p SrcSubReg the index to be used on that source to
234 /// get the tracked value. When nullptr is returned, no alternative source
235 /// has been found.
236 const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
237
238 /// \brief Get the last register where the initial value can be found.
239 /// Initially this is the register of the definition.
240 /// Then, after each successful call to getNextSource, this is the
241 /// register of the last source.
242 unsigned getReg() const { return Reg; }
243 };
Bill Wendlingca678352010-08-09 23:59:04 +0000244}
245
246char PeepholeOptimizer::ID = 0;
Andrew Trick1fa5bcb2012-02-08 21:23:13 +0000247char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
Owen Anderson8ac477f2010-10-12 19:48:12 +0000248INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
249 "Peephole Optimizations", false, false)
250INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
251INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
Owen Andersondf7a4f22010-10-07 22:25:06 +0000252 "Peephole Optimizations", false, false)
Bill Wendlingca678352010-08-09 23:59:04 +0000253
Jim Grosbachedcb8682012-05-01 23:21:41 +0000254/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
Bill Wendlingca678352010-08-09 23:59:04 +0000255/// a single register and writes a single register and it does not modify the
256/// source, and if the source value is preserved as a sub-register of the
257/// result, then replace all reachable uses of the source with the subreg of the
258/// result.
Andrew Trick9e761992012-02-08 21:22:43 +0000259///
Bill Wendlingca678352010-08-09 23:59:04 +0000260/// Do not generate an EXTRACT that is used only in a debug use, as this changes
261/// the code. Since this code does not currently share EXTRACTs, just ignore all
262/// debug uses.
263bool PeepholeOptimizer::
Jim Grosbachedcb8682012-05-01 23:21:41 +0000264optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
Hans Wennborg97a59ae2014-08-11 13:52:46 +0000265 SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
Bill Wendlingca678352010-08-09 23:59:04 +0000266 unsigned SrcReg, DstReg, SubIdx;
267 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
268 return false;
Andrew Trick9e761992012-02-08 21:22:43 +0000269
Bill Wendlingca678352010-08-09 23:59:04 +0000270 if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
271 TargetRegisterInfo::isPhysicalRegister(SrcReg))
272 return false;
273
Jakob Stoklund Olesen8eb99052012-06-19 21:10:18 +0000274 if (MRI->hasOneNonDBGUse(SrcReg))
Bill Wendlingca678352010-08-09 23:59:04 +0000275 // No other uses.
276 return false;
277
Jakob Stoklund Olesen2f06a652012-05-20 18:42:55 +0000278 // Ensure DstReg can get a register class that actually supports
279 // sub-registers. Don't change the class until we commit.
280 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
Eric Christopherd9134482014-08-04 21:25:23 +0000281 DstRC = TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
282 DstRC, SubIdx);
Jakob Stoklund Olesen2f06a652012-05-20 18:42:55 +0000283 if (!DstRC)
284 return false;
285
Jakob Stoklund Olesen0f855e42012-06-19 21:14:34 +0000286 // The ext instr may be operating on a sub-register of SrcReg as well.
287 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
288 // register.
289 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
290 // SrcReg:SubIdx should be replaced.
Eric Christopherd9134482014-08-04 21:25:23 +0000291 bool UseSrcSubIdx =
292 TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
293 MRI->getRegClass(SrcReg), SubIdx) != nullptr;
Jakob Stoklund Olesen0f855e42012-06-19 21:14:34 +0000294
Bill Wendlingca678352010-08-09 23:59:04 +0000295 // The source has other uses. See if we can replace the other uses with use of
296 // the result of the extension.
297 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
Owen Andersonb36376e2014-03-17 19:36:09 +0000298 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
299 ReachedBBs.insert(UI.getParent());
Bill Wendlingca678352010-08-09 23:59:04 +0000300
301 // Uses that are in the same BB of uses of the result of the instruction.
302 SmallVector<MachineOperand*, 8> Uses;
303
304 // Uses that the result of the instruction can reach.
305 SmallVector<MachineOperand*, 8> ExtendedUses;
306
307 bool ExtendLife = true;
Owen Andersonb36376e2014-03-17 19:36:09 +0000308 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
Owen Anderson16c6bf42014-03-13 23:12:04 +0000309 MachineInstr *UseMI = UseMO.getParent();
Bill Wendlingca678352010-08-09 23:59:04 +0000310 if (UseMI == MI)
311 continue;
312
313 if (UseMI->isPHI()) {
314 ExtendLife = false;
315 continue;
316 }
317
Jakob Stoklund Olesen0f855e42012-06-19 21:14:34 +0000318 // Only accept uses of SrcReg:SubIdx.
319 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
320 continue;
321
Bill Wendlingca678352010-08-09 23:59:04 +0000322 // It's an error to translate this:
323 //
324 // %reg1025 = <sext> %reg1024
325 // ...
326 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
327 //
328 // into this:
329 //
330 // %reg1025 = <sext> %reg1024
331 // ...
332 // %reg1027 = COPY %reg1025:4
333 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
334 //
335 // The problem here is that SUBREG_TO_REG is there to assert that an
336 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
337 // the COPY here, it will give us the value after the <sext>, not the
338 // original value of %reg1024 before <sext>.
339 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
340 continue;
341
342 MachineBasicBlock *UseMBB = UseMI->getParent();
343 if (UseMBB == MBB) {
344 // Local uses that come after the extension.
345 if (!LocalMIs.count(UseMI))
346 Uses.push_back(&UseMO);
347 } else if (ReachedBBs.count(UseMBB)) {
348 // Non-local uses where the result of the extension is used. Always
349 // replace these unless it's a PHI.
350 Uses.push_back(&UseMO);
351 } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
352 // We may want to extend the live range of the extension result in order
353 // to replace these uses.
354 ExtendedUses.push_back(&UseMO);
355 } else {
356 // Both will be live out of the def MBB anyway. Don't extend live range of
357 // the extension result.
358 ExtendLife = false;
359 break;
360 }
361 }
362
363 if (ExtendLife && !ExtendedUses.empty())
364 // Extend the liveness of the extension result.
365 std::copy(ExtendedUses.begin(), ExtendedUses.end(),
366 std::back_inserter(Uses));
367
368 // Now replace all uses.
369 bool Changed = false;
370 if (!Uses.empty()) {
371 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
372
373 // Look for PHI uses of the extended result, we don't want to extend the
374 // liveness of a PHI input. It breaks all kinds of assumptions down
375 // stream. A PHI use is expected to be the kill of its source values.
Owen Andersonb36376e2014-03-17 19:36:09 +0000376 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
377 if (UI.isPHI())
378 PHIBBs.insert(UI.getParent());
Bill Wendlingca678352010-08-09 23:59:04 +0000379
380 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
381 for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
382 MachineOperand *UseMO = Uses[i];
383 MachineInstr *UseMI = UseMO->getParent();
384 MachineBasicBlock *UseMBB = UseMI->getParent();
385 if (PHIBBs.count(UseMBB))
386 continue;
387
Lang Hamesd5862ce2012-02-25 02:01:00 +0000388 // About to add uses of DstReg, clear DstReg's kill flags.
Jakob Stoklund Olesen2f06a652012-05-20 18:42:55 +0000389 if (!Changed) {
Lang Hamesd5862ce2012-02-25 02:01:00 +0000390 MRI->clearKillFlags(DstReg);
Jakob Stoklund Olesen2f06a652012-05-20 18:42:55 +0000391 MRI->constrainRegClass(DstReg, DstRC);
392 }
Lang Hamesd5862ce2012-02-25 02:01:00 +0000393
Bill Wendlingca678352010-08-09 23:59:04 +0000394 unsigned NewVR = MRI->createVirtualRegister(RC);
Jakob Stoklund Olesen0f855e42012-06-19 21:14:34 +0000395 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
396 TII->get(TargetOpcode::COPY), NewVR)
Bill Wendlingca678352010-08-09 23:59:04 +0000397 .addReg(DstReg, 0, SubIdx);
Jakob Stoklund Olesen0f855e42012-06-19 21:14:34 +0000398 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
399 if (UseSrcSubIdx) {
400 Copy->getOperand(0).setSubReg(SubIdx);
401 Copy->getOperand(0).setIsUndef();
402 }
Bill Wendlingca678352010-08-09 23:59:04 +0000403 UseMO->setReg(NewVR);
404 ++NumReuse;
405 Changed = true;
406 }
407 }
408
409 return Changed;
410}
411
Jim Grosbachedcb8682012-05-01 23:21:41 +0000412/// optimizeCmpInstr - If the instruction is a compare and the previous
Bill Wendlingca678352010-08-09 23:59:04 +0000413/// instruction it's comparing against all ready sets (or could be modified to
414/// set) the same flag as the compare, then we can remove the comparison and use
415/// the flag from the previous instruction.
Jim Grosbachedcb8682012-05-01 23:21:41 +0000416bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
Evan Chenge4b8ac92011-03-15 05:13:13 +0000417 MachineBasicBlock *MBB) {
Bill Wendlingca678352010-08-09 23:59:04 +0000418 // If this instruction is a comparison against zero and isn't comparing a
419 // physical register, we can try to optimize it.
Manman Ren6fa76dc2012-06-29 21:33:59 +0000420 unsigned SrcReg, SrcReg2;
Gabor Greifadbbb932010-09-21 12:01:15 +0000421 int CmpMask, CmpValue;
Manman Ren6fa76dc2012-06-29 21:33:59 +0000422 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
423 TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
424 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
Bill Wendlingca678352010-08-09 23:59:04 +0000425 return false;
426
Bill Wendling27dddd12010-09-11 00:13:50 +0000427 // Attempt to optimize the comparison instruction.
Manman Ren6fa76dc2012-06-29 21:33:59 +0000428 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
Evan Chenge4b8ac92011-03-15 05:13:13 +0000429 ++NumCmps;
Bill Wendlingca678352010-08-09 23:59:04 +0000430 return true;
431 }
432
433 return false;
434}
435
Jakob Stoklund Olesen2382d322012-08-16 23:11:47 +0000436/// Optimize a select instruction.
437bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
438 unsigned TrueOp = 0;
439 unsigned FalseOp = 0;
440 bool Optimizable = false;
441 SmallVector<MachineOperand, 4> Cond;
442 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
443 return false;
444 if (!Optimizable)
445 return false;
446 if (!TII->optimizeSelect(MI))
447 return false;
448 MI->eraseFromParent();
449 ++NumSelects;
450 return true;
451}
452
Quentin Colombetcf71c632013-09-13 18:26:31 +0000453/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
454/// share the same register file.
455static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
456 const TargetRegisterClass *DefRC,
457 unsigned DefSubReg,
458 const TargetRegisterClass *SrcRC,
459 unsigned SrcSubReg) {
460 // Same register class.
461 if (DefRC == SrcRC)
462 return true;
463
464 // Both operands are sub registers. Check if they share a register class.
465 unsigned SrcIdx, DefIdx;
466 if (SrcSubReg && DefSubReg)
467 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
Craig Topperc0196b12014-04-14 00:51:57 +0000468 SrcIdx, DefIdx) != nullptr;
Quentin Colombetcf71c632013-09-13 18:26:31 +0000469 // At most one of the register is a sub register, make it Src to avoid
470 // duplicating the test.
471 if (!SrcSubReg) {
472 std::swap(DefSubReg, SrcSubReg);
473 std::swap(DefRC, SrcRC);
474 }
475
476 // One of the register is a sub register, check if we can get a superclass.
477 if (SrcSubReg)
Craig Topperc0196b12014-04-14 00:51:57 +0000478 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
Quentin Colombetcf71c632013-09-13 18:26:31 +0000479 // Plain copy.
Craig Topperc0196b12014-04-14 00:51:57 +0000480 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
Quentin Colombetcf71c632013-09-13 18:26:31 +0000481}
482
483/// \brief Get the index of the definition and source for \p Copy
484/// instruction.
485/// \pre Copy.isCopy() or Copy.isBitcast().
486/// \return True if the Copy instruction has only one register source
487/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx
488/// are invalid.
489static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy,
490 unsigned &DefIdx, unsigned &SrcIdx) {
491 assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type.");
492 if (Copy.isCopy()) {
493 // Copy instruction are supposed to be: Def = Src.
494 if (Copy.getDesc().getNumOperands() != 2)
495 return false;
496 DefIdx = 0;
497 SrcIdx = 1;
498 assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!");
499 return true;
500 }
501 // Bitcast case.
502 // Bitcasts with more than one def are not supported.
503 if (Copy.getDesc().getNumDefs() != 1)
504 return false;
505 // Initialize SrcIdx to an undefined operand.
506 SrcIdx = Copy.getDesc().getNumOperands();
507 for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) {
508 const MachineOperand &MO = Copy.getOperand(OpIdx);
509 if (!MO.isReg() || !MO.getReg())
510 continue;
511 if (MO.isDef())
512 DefIdx = OpIdx;
513 else if (SrcIdx != EndOpIdx)
514 // Multiple sources?
515 return false;
516 SrcIdx = OpIdx;
517 }
518 return true;
519}
520
521/// \brief Optimize a copy or bitcast instruction to avoid cross
522/// register bank copy. The optimization looks through a chain of
523/// copies and try to find a source that has a compatible register
524/// class.
525/// Two register classes are considered to be compatible if they share
526/// the same register bank.
527/// New copies issued by this optimization are register allocator
528/// friendly. This optimization does not remove any copy as it may
529/// overconstraint the register allocator, but replaces some when
530/// possible.
531/// \pre \p MI is a Copy (MI->isCopy() is true)
532/// \return True, when \p MI has been optimized. In that case, \p MI has
533/// been removed from its parent.
534bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
535 unsigned DefIdx, SrcIdx;
536 if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx))
537 return false;
538
539 const MachineOperand &MODef = MI->getOperand(DefIdx);
540 assert(MODef.isReg() && "Copies must be between registers.");
541 unsigned Def = MODef.getReg();
542
543 if (TargetRegisterInfo::isPhysicalRegister(Def))
544 return false;
545
546 const TargetRegisterClass *DefRC = MRI->getRegClass(Def);
547 unsigned DefSubReg = MODef.getSubReg();
548
549 unsigned Src;
550 unsigned SrcSubReg;
551 bool ShouldRewrite = false;
Eric Christopherd9134482014-08-04 21:25:23 +0000552 const TargetRegisterInfo &TRI = *TM->getSubtargetImpl()->getRegisterInfo();
Quentin Colombetcf71c632013-09-13 18:26:31 +0000553
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000554 // Follow the chain of copies until we reach the top of the use-def chain
555 // or find a more suitable source.
556 ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
Quentin Colombetcf71c632013-09-13 18:26:31 +0000557 do {
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000558 unsigned CopySrcIdx, CopySrcSubReg;
559 if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
Quentin Colombetcf71c632013-09-13 18:26:31 +0000560 break;
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000561 Src = ValTracker.getReg();
562 SrcSubReg = CopySrcSubReg;
Quentin Colombetcf71c632013-09-13 18:26:31 +0000563
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000564 // Do not extend the live-ranges of physical registers as they add
565 // constraints to the register allocator.
566 // Moreover, if we want to extend the live-range of a physical register,
567 // unlike SSA virtual register, we will have to check that they are not
568 // redefine before the related use.
Quentin Colombetcf71c632013-09-13 18:26:31 +0000569 if (TargetRegisterInfo::isPhysicalRegister(Src))
570 break;
571
572 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
Quentin Colombetcf71c632013-09-13 18:26:31 +0000573
574 // If this source does not incur a cross register bank copy, use it.
575 ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
576 SrcSubReg);
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000577 } while (!ShouldRewrite);
Quentin Colombetcf71c632013-09-13 18:26:31 +0000578
579 // If we did not find a more suitable source, there is nothing to optimize.
580 if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
581 return false;
582
583 // Rewrite the copy to avoid a cross register bank penalty.
584 unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def :
585 MRI->createVirtualRegister(DefRC);
586 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
587 TII->get(TargetOpcode::COPY), NewVR)
588 .addReg(Src, 0, SrcSubReg);
589 NewCopy->getOperand(0).setSubReg(DefSubReg);
590
591 MRI->replaceRegWith(Def, NewVR);
592 MRI->clearKillFlags(NewVR);
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000593 // We extended the lifetime of Src.
594 // Clear the kill flags to account for that.
595 MRI->clearKillFlags(Src);
Quentin Colombetcf71c632013-09-13 18:26:31 +0000596 MI->eraseFromParent();
597 ++NumCopiesBitcasts;
598 return true;
599}
600
Manman Ren5759d012012-08-02 00:56:42 +0000601/// isLoadFoldable - Check whether MI is a candidate for folding into a later
602/// instruction. We only fold loads to virtual registers and the virtual
603/// register defined has a single use.
Lang Hames5dc14bd2014-04-02 22:59:58 +0000604bool PeepholeOptimizer::isLoadFoldable(
605 MachineInstr *MI,
606 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
Manman Renba8122c2012-08-02 19:37:32 +0000607 if (!MI->canFoldAsLoad() || !MI->mayLoad())
608 return false;
609 const MCInstrDesc &MCID = MI->getDesc();
610 if (MCID.getNumDefs() != 1)
611 return false;
612
613 unsigned Reg = MI->getOperand(0).getReg();
Ekaterina Romanova8d620082014-03-13 18:47:12 +0000614 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
Manman Renba8122c2012-08-02 19:37:32 +0000615 // loads. It should be checked when processing uses of the load, since
616 // uses can be removed during peephole.
617 if (!MI->getOperand(0).getSubReg() &&
618 TargetRegisterInfo::isVirtualRegister(Reg) &&
Ekaterina Romanova8d620082014-03-13 18:47:12 +0000619 MRI->hasOneNonDBGUse(Reg)) {
Lang Hames5dc14bd2014-04-02 22:59:58 +0000620 FoldAsLoadDefCandidates.insert(Reg);
Manman Renba8122c2012-08-02 19:37:32 +0000621 return true;
Manman Ren5759d012012-08-02 00:56:42 +0000622 }
623 return false;
624}
625
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000626bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
627 SmallSet<unsigned, 4> &ImmDefRegs,
628 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
Evan Cheng6cc775f2011-06-28 19:10:37 +0000629 const MCInstrDesc &MCID = MI->getDesc();
Evan Cheng7f8e5632011-12-07 07:15:52 +0000630 if (!MI->isMoveImmediate())
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000631 return false;
Evan Cheng6cc775f2011-06-28 19:10:37 +0000632 if (MCID.getNumDefs() != 1)
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000633 return false;
634 unsigned Reg = MI->getOperand(0).getReg();
635 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
636 ImmDefMIs.insert(std::make_pair(Reg, MI));
637 ImmDefRegs.insert(Reg);
638 return true;
639 }
Andrew Trick9e761992012-02-08 21:22:43 +0000640
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000641 return false;
642}
643
Jim Grosbachedcb8682012-05-01 23:21:41 +0000644/// foldImmediate - Try folding register operands that are defined by move
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000645/// immediate instructions, i.e. a trivial constant folding optimization, if
646/// and only if the def and use are in the same BB.
Jim Grosbachedcb8682012-05-01 23:21:41 +0000647bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000648 SmallSet<unsigned, 4> &ImmDefRegs,
649 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
650 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
651 MachineOperand &MO = MI->getOperand(i);
652 if (!MO.isReg() || MO.isDef())
653 continue;
654 unsigned Reg = MO.getReg();
Jakob Stoklund Olesen2fb5b312011-01-10 02:58:51 +0000655 if (!TargetRegisterInfo::isVirtualRegister(Reg))
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000656 continue;
657 if (ImmDefRegs.count(Reg) == 0)
658 continue;
659 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
660 assert(II != ImmDefMIs.end());
661 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
662 ++NumImmFold;
663 return true;
664 }
665 }
666 return false;
667}
668
Bill Wendlingca678352010-08-09 23:59:04 +0000669bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
Paul Robinson7c99ec52014-03-31 17:43:35 +0000670 if (skipOptnoneFunction(*MF.getFunction()))
671 return false;
672
Craig Topper588ceec2012-12-17 03:56:00 +0000673 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
674 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
675
Evan Cheng2ce016c2010-11-15 21:20:45 +0000676 if (DisablePeephole)
677 return false;
Andrew Trick9e761992012-02-08 21:22:43 +0000678
Bill Wendlingca678352010-08-09 23:59:04 +0000679 TM = &MF.getTarget();
Eric Christopherd9134482014-08-04 21:25:23 +0000680 TII = TM->getSubtargetImpl()->getInstrInfo();
Bill Wendlingca678352010-08-09 23:59:04 +0000681 MRI = &MF.getRegInfo();
Craig Topperc0196b12014-04-14 00:51:57 +0000682 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
Bill Wendlingca678352010-08-09 23:59:04 +0000683
684 bool Changed = false;
685
Bill Wendlingca678352010-08-09 23:59:04 +0000686 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
687 MachineBasicBlock *MBB = &*I;
Andrew Trick9e761992012-02-08 21:22:43 +0000688
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000689 bool SeenMoveImm = false;
Hans Wennborg941a5702014-08-11 02:50:43 +0000690 SmallPtrSet<MachineInstr*, 16> LocalMIs;
Lang Hames5dc14bd2014-04-02 22:59:58 +0000691 SmallSet<unsigned, 4> ImmDefRegs;
692 DenseMap<unsigned, MachineInstr*> ImmDefMIs;
693 SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
Bill Wendlingca678352010-08-09 23:59:04 +0000694
695 for (MachineBasicBlock::iterator
Bill Wendlingaee679b2010-09-10 21:55:43 +0000696 MII = I->begin(), MIE = I->end(); MII != MIE; ) {
Evan Cheng9bf3f8e2011-02-14 21:50:37 +0000697 MachineInstr *MI = &*MII;
Jakob Stoklund Olesen714f5952012-08-17 14:38:59 +0000698 // We may be erasing MI below, increment MII now.
699 ++MII;
Evan Cheng2ce016c2010-11-15 21:20:45 +0000700 LocalMIs.insert(MI);
Bill Wendlingca678352010-08-09 23:59:04 +0000701
Ekaterina Romanova8d620082014-03-13 18:47:12 +0000702 // Skip debug values. They should not affect this peephole optimization.
703 if (MI->isDebugValue())
704 continue;
705
Manman Ren5759d012012-08-02 00:56:42 +0000706 // If there exists an instruction which belongs to the following
Lang Hames5dc14bd2014-04-02 22:59:58 +0000707 // categories, we will discard the load candidates.
Rafael Espindolab1f25f12014-03-07 06:08:31 +0000708 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
Ekaterina Romanova8d620082014-03-13 18:47:12 +0000709 MI->isKill() || MI->isInlineAsm() ||
Evan Cheng9bf3f8e2011-02-14 21:50:37 +0000710 MI->hasUnmodeledSideEffects()) {
Lang Hames5dc14bd2014-04-02 22:59:58 +0000711 FoldAsLoadDefCandidates.clear();
Evan Cheng2ce016c2010-11-15 21:20:45 +0000712 continue;
Evan Cheng9bf3f8e2011-02-14 21:50:37 +0000713 }
Manman Ren5759d012012-08-02 00:56:42 +0000714 if (MI->mayStore() || MI->isCall())
Lang Hames5dc14bd2014-04-02 22:59:58 +0000715 FoldAsLoadDefCandidates.clear();
Evan Cheng2ce016c2010-11-15 21:20:45 +0000716
Quentin Colombetcf71c632013-09-13 18:26:31 +0000717 if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
Jakob Stoklund Olesen2382d322012-08-16 23:11:47 +0000718 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
719 (MI->isSelect() && optimizeSelect(MI))) {
720 // MI is deleted.
721 LocalMIs.erase(MI);
722 Changed = true;
Jakob Stoklund Olesen2382d322012-08-16 23:11:47 +0000723 continue;
Evan Cheng9bf3f8e2011-02-14 21:50:37 +0000724 }
725
726 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000727 SeenMoveImm = true;
Bill Wendlingca678352010-08-09 23:59:04 +0000728 } else {
Jim Grosbachedcb8682012-05-01 23:21:41 +0000729 Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
Rafael Espindola048405f2012-10-15 18:21:07 +0000730 // optimizeExtInstr might have created new instructions after MI
731 // and before the already incremented MII. Adjust MII so that the
732 // next iteration sees the new instructions.
733 MII = MI;
734 ++MII;
Evan Cheng7f8ab6e2010-11-17 20:13:28 +0000735 if (SeenMoveImm)
Jim Grosbachedcb8682012-05-01 23:21:41 +0000736 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
Bill Wendlingca678352010-08-09 23:59:04 +0000737 }
Evan Cheng98196b42011-02-15 05:00:24 +0000738
Manman Ren5759d012012-08-02 00:56:42 +0000739 // Check whether MI is a load candidate for folding into a later
740 // instruction. If MI is not a candidate, check whether we can fold an
741 // earlier load into MI.
Lang Hames5dc14bd2014-04-02 22:59:58 +0000742 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
743 !FoldAsLoadDefCandidates.empty()) {
Lang Hames5dc14bd2014-04-02 22:59:58 +0000744 const MCInstrDesc &MIDesc = MI->getDesc();
745 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
746 ++i) {
747 const MachineOperand &MOp = MI->getOperand(i);
748 if (!MOp.isReg())
749 continue;
Lang Hames3c0dc2a2014-04-03 05:03:20 +0000750 unsigned FoldAsLoadDefReg = MOp.getReg();
751 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
752 // We need to fold load after optimizeCmpInstr, since
753 // optimizeCmpInstr can enable folding by converting SUB to CMP.
754 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
755 // we need it for markUsesInDebugValueAsUndef().
756 unsigned FoldedReg = FoldAsLoadDefReg;
Craig Topperc0196b12014-04-14 00:51:57 +0000757 MachineInstr *DefMI = nullptr;
Lang Hames3c0dc2a2014-04-03 05:03:20 +0000758 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
759 FoldAsLoadDefReg,
Lang Hames5dc14bd2014-04-02 22:59:58 +0000760 DefMI);
761 if (FoldMI) {
762 // Update LocalMIs since we replaced MI with FoldMI and deleted
763 // DefMI.
764 DEBUG(dbgs() << "Replacing: " << *MI);
765 DEBUG(dbgs() << " With: " << *FoldMI);
766 LocalMIs.erase(MI);
767 LocalMIs.erase(DefMI);
768 LocalMIs.insert(FoldMI);
769 MI->eraseFromParent();
770 DefMI->eraseFromParent();
Lang Hames3c0dc2a2014-04-03 05:03:20 +0000771 MRI->markUsesInDebugValueAsUndef(FoldedReg);
772 FoldAsLoadDefCandidates.erase(FoldedReg);
Lang Hames5dc14bd2014-04-02 22:59:58 +0000773 ++NumLoadFold;
774 // MI is replaced with FoldMI.
775 Changed = true;
776 break;
777 }
778 }
Manman Ren5759d012012-08-02 00:56:42 +0000779 }
780 }
Bill Wendlingca678352010-08-09 23:59:04 +0000781 }
782 }
783
784 return Changed;
785}
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000786
787bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
788 unsigned &SrcSubReg) {
789 assert(Def->isCopy() && "Invalid definition");
790 // Copy instruction are supposed to be: Def = Src.
791 // If someone breaks this assumption, bad things will happen everywhere.
792 assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
793
794 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
795 // If we look for a different subreg, it means we want a subreg of src.
796 // Bails as we do not support composing subreg yet.
797 return false;
798 // Otherwise, we want the whole source.
799 SrcIdx = 1;
800 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
801 return true;
802}
803
804bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
805 unsigned &SrcSubReg) {
806 assert(Def->isBitcast() && "Invalid definition");
807
808 // Bail if there are effects that a plain copy will not expose.
809 if (Def->hasUnmodeledSideEffects())
810 return false;
811
812 // Bitcasts with more than one def are not supported.
813 if (Def->getDesc().getNumDefs() != 1)
814 return false;
815 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
816 // If we look for a different subreg, it means we want a subreg of the src.
817 // Bails as we do not support composing subreg yet.
818 return false;
819
820 SrcIdx = Def->getDesc().getNumOperands();
821 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
822 ++OpIdx) {
823 const MachineOperand &MO = Def->getOperand(OpIdx);
824 if (!MO.isReg() || !MO.getReg())
825 continue;
826 assert(!MO.isDef() && "We should have skipped all the definitions by now");
827 if (SrcIdx != EndOpIdx)
828 // Multiple sources?
829 return false;
830 SrcIdx = OpIdx;
831 }
832 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
833 return true;
834}
835
836bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
837 unsigned &SrcSubReg) {
838 assert(Def->isRegSequence() && "Invalid definition");
839
840 if (Def->getOperand(DefIdx).getSubReg())
841 // If we are composing subreg, bails out.
842 // The case we are checking is Def.<subreg> = REG_SEQUENCE.
843 // This should almost never happen as the SSA property is tracked at
844 // the register level (as opposed to the subreg level).
845 // I.e.,
846 // Def.sub0 =
847 // Def.sub1 =
848 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
849 // Def. Thus, it must not be generated.
Quentin Colombet6d590d52014-07-01 16:23:44 +0000850 // However, some code could theoretically generates a single
Quentin Colombet1111e6f2014-07-01 14:33:36 +0000851 // Def.sub0 (i.e, not defining the other subregs) and we would
852 // have this case.
853 // If we can ascertain (or force) that this never happens, we could
854 // turn that into an assertion.
855 return false;
856
857 // We are looking at:
858 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
859 // Check if one of the operand defines the subreg we are interested in.
860 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
861 OpIdx != EndOpIdx; OpIdx += 2) {
862 const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
863 assert(MOSubIdx.isImm() &&
864 "One of the subindex of the reg_sequence is not an immediate");
865 if (MOSubIdx.getImm() == DefSubReg) {
866 assert(Def->getOperand(OpIdx).isReg() &&
867 "One of the source of the reg_sequence is not a register");
868 SrcIdx = OpIdx;
869 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
870 return true;
871 }
872 }
873
874 // If the subreg we are tracking is super-defined by another subreg,
875 // we could follow this value. However, this would require to compose
876 // the subreg and we do not do that for now.
877 return false;
878}
879
880bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
881 unsigned &SrcSubReg) {
882 assert(Def->isInsertSubreg() && "Invalid definition");
883 if (Def->getOperand(DefIdx).getSubReg())
884 // If we are composing subreg, bails out.
885 // Same remark as getNextSourceFromRegSequence.
886 // I.e., this may be turned into an assert.
887 return false;
888
889 // We are looking at:
890 // Def = INSERT_SUBREG v0, v1, sub1
891 // There are two cases:
892 // 1. DefSubReg == sub1, get v1.
893 // 2. DefSubReg != sub1, the value may be available through v0.
894
895 // #1 Check if the inserted register matches the require sub index.
896 unsigned InsertedSubReg = Def->getOperand(3).getImm();
897 if (InsertedSubReg == DefSubReg) {
898 SrcIdx = 2;
899 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
900 return true;
901 }
902 // #2 Otherwise, if the sub register we are looking for is not partial
903 // defined by the inserted element, we can look through the main
904 // register (v0).
905 // To check the overlapping we need a MRI and a TRI.
906 if (!MRI)
907 return false;
908
909 const MachineOperand &MODef = Def->getOperand(DefIdx);
910 const MachineOperand &MOBase = Def->getOperand(1);
911 // If the result register (Def) and the base register (v0) do not
912 // have the same register class or if we have to compose
913 // subregisters, bails out.
914 if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
915 MOBase.getSubReg())
916 return false;
917
918 // Get the TRI and check if inserted sub register overlaps with the
919 // sub register we are tracking.
920 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
921 if (!TRI ||
922 (TRI->getSubRegIndexLaneMask(DefSubReg) &
923 TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
924 return false;
925 // At this point, the value is available in v0 via the same subreg
926 // we used for Def.
927 SrcIdx = 1;
928 SrcSubReg = DefSubReg;
929 return true;
930}
931
932bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
933 unsigned &SrcSubReg) {
934 assert(Def->isExtractSubreg() && "Invalid definition");
935 // We are looking at:
936 // Def = EXTRACT_SUBREG v0, sub0
937
938 // Bails if we have to compose sub registers.
939 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
940 if (DefSubReg)
941 return false;
942
943 // Bails if we have to compose sub registers.
944 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
945 if (Def->getOperand(1).getSubReg())
946 return false;
947 // Otherwise, the value is available in the v0.sub0.
948 SrcIdx = 1;
949 SrcSubReg = Def->getOperand(2).getImm();
950 return true;
951}
952
953bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
954 unsigned &SrcSubReg) {
955 assert(Def->isSubregToReg() && "Invalid definition");
956 // We are looking at:
957 // Def = SUBREG_TO_REG Imm, v0, sub0
958
959 // Bails if we have to compose sub registers.
960 // If DefSubReg != sub0, we would have to check that all the bits
961 // we track are included in sub0 and if yes, we would have to
962 // determine the right subreg in v0.
963 if (DefSubReg != Def->getOperand(3).getImm())
964 return false;
965 // Bails if we have to compose sub registers.
966 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
967 if (Def->getOperand(2).getSubReg())
968 return false;
969
970 SrcIdx = 2;
971 SrcSubReg = Def->getOperand(3).getImm();
972 return true;
973}
974
975bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
976 assert(Def && "This method needs a valid definition");
977
978 assert(
979 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
980 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
981 if (Def->isCopy())
982 return getNextSourceFromCopy(SrcIdx, SrcSubReg);
983 if (Def->isBitcast())
984 return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
985 // All the remaining cases involve "complex" instructions.
986 // Bails if we did not ask for the advanced tracking.
987 if (!UseAdvancedTracking)
988 return false;
989 if (Def->isRegSequence())
990 return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
991 if (Def->isInsertSubreg())
992 return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
993 if (Def->isExtractSubreg())
994 return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
995 if (Def->isSubregToReg())
996 return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
997 return false;
998}
999
1000const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
1001 unsigned &SrcSubReg) {
1002 // If we reach a point where we cannot move up in the use-def chain,
1003 // there is nothing we can get.
1004 if (!Def)
1005 return nullptr;
1006
1007 const MachineInstr *PrevDef = nullptr;
1008 // Try to find the next source.
1009 if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
1010 // Update definition, definition index, and subregister for the
1011 // next call of getNextSource.
1012 const MachineOperand &MO = Def->getOperand(SrcIdx);
1013 assert(MO.isReg() && !MO.isDef() && "Source is invalid");
1014 // Update the current register.
1015 Reg = MO.getReg();
1016 // Update the return value before moving up in the use-def chain.
1017 PrevDef = Def;
1018 // If we can still move up in the use-def chain, move to the next
1019 // defintion.
1020 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
1021 Def = MRI->getVRegDef(Reg);
1022 DefIdx = MRI->def_begin(Reg).getOperandNo();
1023 DefSubReg = SrcSubReg;
1024 return PrevDef;
1025 }
1026 }
1027 // If we end up here, this means we will not be able to find another source
1028 // for the next iteration.
1029 // Make sure any new call to getNextSource bails out early by cutting the
1030 // use-def chain.
1031 Def = nullptr;
1032 return PrevDef;
1033}