blob: a8d98133afcfe95611b35e0998363ca5399d3844 [file] [log] [blame]
Bill Schmidt34af5e12015-11-10 21:38:26 +00001//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9//
10// This pass performs peephole optimizations to clean up ugly code
11// sequences at the MachineInstruction layer. It runs at the end of
12// the SSA phases, following VSX swap removal. A pass of dead code
13// elimination follows this one for quick clean-up of any dead
14// instructions introduced here. Although we could do this as callbacks
15// from the generic peephole pass, this would have a couple of bad
16// effects: it might remove optimization opportunities for VSX swap
17// removal, and it would miss cleanups made possible following VSX
18// swap removal.
19//
20//===---------------------------------------------------------------------===//
21
Bill Schmidt34af5e12015-11-10 21:38:26 +000022#include "PPC.h"
23#include "PPCInstrBuilder.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000024#include "PPCInstrInfo.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000025#include "PPCTargetMachine.h"
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000026#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineDominators.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000028#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/Debug.h"
Hiroshi Inoue614453b2017-09-05 04:15:17 +000032#include "MCTargetDesc/PPCPredicates.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000033
34using namespace llvm;
35
36#define DEBUG_TYPE "ppc-mi-peepholes"
37
Zaara Syedaf94d58d2017-11-27 20:26:36 +000038STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
39STATISTIC(MultiTOCSaves,
40 "Number of functions with multiple TOC saves that must be kept");
Hiroshi Inouee3a3e3c2017-10-16 04:12:57 +000041STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
42STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000043STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
44
Hiroshi Inouee3a3e3c2017-10-16 04:12:57 +000045static cl::opt<bool>
46 EnableSExtElimination("ppc-eliminate-signext",
47 cl::desc("enable elimination of sign-extensions"),
Nemanja Ivanovic0026c062017-10-20 00:36:46 +000048 cl::init(false), cl::Hidden);
Hiroshi Inouee3a3e3c2017-10-16 04:12:57 +000049
50static cl::opt<bool>
51 EnableZExtElimination("ppc-eliminate-zeroext",
52 cl::desc("enable elimination of zero-extensions"),
Nemanja Ivanovic0026c062017-10-20 00:36:46 +000053 cl::init(false), cl::Hidden);
Hiroshi Inouee3a3e3c2017-10-16 04:12:57 +000054
Bill Schmidt34af5e12015-11-10 21:38:26 +000055namespace llvm {
56 void initializePPCMIPeepholePass(PassRegistry&);
57}
58
59namespace {
60
61struct PPCMIPeephole : public MachineFunctionPass {
62
63 static char ID;
64 const PPCInstrInfo *TII;
65 MachineFunction *MF;
66 MachineRegisterInfo *MRI;
67
68 PPCMIPeephole() : MachineFunctionPass(ID) {
69 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
70 }
71
72private:
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000073 MachineDominatorTree *MDT;
74
Bill Schmidt34af5e12015-11-10 21:38:26 +000075 // Initialize class variables.
76 void initialize(MachineFunction &MFParm);
77
78 // Perform peepholes.
79 bool simplifyCode(void);
80
Hiroshi Inoue614453b2017-09-05 04:15:17 +000081 // Perform peepholes.
82 bool eliminateRedundantCompare(void);
Zaara Syedaf94d58d2017-11-27 20:26:36 +000083 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
84 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
85 MachineInstr *MI);
Bill Schmidt34af5e12015-11-10 21:38:26 +000086 // Find the "true" register represented by SrcReg (following chains
87 // of copies and subreg_to_reg operations).
88 unsigned lookThruCopyLike(unsigned SrcReg);
89
90public:
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000091
92 void getAnalysisUsage(AnalysisUsage &AU) const override {
93 AU.addRequired<MachineDominatorTree>();
94 AU.addPreserved<MachineDominatorTree>();
95 MachineFunctionPass::getAnalysisUsage(AU);
96 }
97
Bill Schmidt34af5e12015-11-10 21:38:26 +000098 // Main entry point for this pass.
99 bool runOnMachineFunction(MachineFunction &MF) override {
Andrew Kaylor289bd5f2016-04-27 19:39:32 +0000100 if (skipFunction(*MF.getFunction()))
101 return false;
Bill Schmidt34af5e12015-11-10 21:38:26 +0000102 initialize(MF);
103 return simplifyCode();
104 }
105};
106
107// Initialize class variables.
108void PPCMIPeephole::initialize(MachineFunction &MFParm) {
109 MF = &MFParm;
110 MRI = &MF->getRegInfo();
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000111 MDT = &getAnalysis<MachineDominatorTree>();
Bill Schmidt34af5e12015-11-10 21:38:26 +0000112 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
113 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
114 DEBUG(MF->dump());
115}
116
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000117static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
118 MachineRegisterInfo *MRI) {
119 assert(Op && "Invalid Operand!");
120 if (!Op->isReg())
121 return nullptr;
122
123 unsigned Reg = Op->getReg();
124 if (!TargetRegisterInfo::isVirtualRegister(Reg))
125 return nullptr;
126
127 return MRI->getVRegDef(Reg);
128}
129
Hiroshi Inouee3a3e3c2017-10-16 04:12:57 +0000130// This function returns number of known zero bits in output of MI
131// starting from the most significant bit.
132static unsigned
133getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
134 unsigned Opcode = MI->getOpcode();
135 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
136 Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
137 return MI->getOperand(3).getImm();
138
139 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
140 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
141 return MI->getOperand(3).getImm();
142
143 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
144 Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
145 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
146 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
147 return 32 + MI->getOperand(3).getImm();
148
149 if (Opcode == PPC::ANDIo) {
150 uint16_t Imm = MI->getOperand(2).getImm();
151 return 48 + countLeadingZeros(Imm);
152 }
153
154 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
155 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
156 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
157 // The result ranges from 0 to 32.
158 return 58;
159
160 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
161 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
162 // The result ranges from 0 to 64.
163 return 57;
164
165 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
166 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
167 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
168 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
169 return 48;
170
171 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
172 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
173 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
174 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
175 return 56;
176
177 if (TII->isZeroExtended(*MI))
178 return 32;
179
180 return 0;
181}
182
Zaara Syedaf94d58d2017-11-27 20:26:36 +0000183// This function maintains a map for the pairs <TOC Save Instr, Keep>
184// Each time a new TOC save is encountered, it checks if any of the exisiting
185// ones are dominated by the new one. If so, it marks the exisiting one as
186// redundant by setting it's entry in the map as false. It then adds the new
187// instruction to the map with either true or false depending on if any
188// exisiting instructions dominated the new one.
189void PPCMIPeephole::UpdateTOCSaves(
190 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
191 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
192 bool Keep = true;
193 for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
194 MachineInstr *CurrInst = It->first;
195 // If new instruction dominates an exisiting one, mark exisiting one as
196 // redundant.
197 if (It->second && MDT->dominates(MI, CurrInst))
198 It->second = false;
199 // Check if the new instruction is redundant.
200 if (MDT->dominates(CurrInst, MI)) {
201 Keep = false;
202 break;
203 }
204 }
205 // Add new instruction to map.
206 TOCSaves[MI] = Keep;
207}
208
Bill Schmidt34af5e12015-11-10 21:38:26 +0000209// Perform peephole optimizations.
210bool PPCMIPeephole::simplifyCode(void) {
211 bool Simplified = false;
212 MachineInstr* ToErase = nullptr;
Zaara Syedaf94d58d2017-11-27 20:26:36 +0000213 std::map<MachineInstr *, bool> TOCSaves;
Bill Schmidt34af5e12015-11-10 21:38:26 +0000214
215 for (MachineBasicBlock &MBB : *MF) {
216 for (MachineInstr &MI : MBB) {
217
218 // If the previous instruction was marked for elimination,
219 // remove it now.
220 if (ToErase) {
221 ToErase->eraseFromParent();
222 ToErase = nullptr;
223 }
224
225 // Ignore debug instructions.
226 if (MI.isDebugValue())
227 continue;
228
229 // Per-opcode peepholes.
230 switch (MI.getOpcode()) {
231
232 default:
233 break;
234
Zaara Syedaf94d58d2017-11-27 20:26:36 +0000235 case PPC::STD: {
236 MachineFrameInfo &MFI = MF->getFrameInfo();
237 if (MFI.hasVarSizedObjects() ||
238 !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
239 break;
240 // When encountering a TOC save instruction, call UpdateTOCSaves
241 // to add it to the TOCSaves map and mark any exisiting TOC saves
242 // it dominates as redundant.
243 if (TII->isTOCSaveMI(MI))
244 UpdateTOCSaves(TOCSaves, &MI);
245 break;
246 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000247 case PPC::XXPERMDI: {
248 // Perform simplifications of 2x64 vector swaps and splats.
249 // A swap is identified by an immediate value of 2, and a splat
250 // is identified by an immediate value of 0 or 3.
251 int Immed = MI.getOperand(3).getImm();
252
253 if (Immed != 1) {
254
255 // For each of these simplifications, we need the two source
256 // regs to match. Unfortunately, MachineCSE ignores COPY and
257 // SUBREG_TO_REG, so for example we can see
258 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
259 // We have to look through chains of COPY and SUBREG_TO_REG
260 // to find the real source values for comparison.
261 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
262 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
263
264 if (TrueReg1 == TrueReg2
265 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
266 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000267 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
268
269 // If this is a splat fed by a splatting load, the splat is
270 // redundant. Replace with a copy. This doesn't happen directly due
271 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
272 // a load of a double to a vector of 64-bit integers.
273 auto isConversionOfLoadAndSplat = [=]() -> bool {
274 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
275 return false;
276 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
277 if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
278 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
279 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
280 return true;
281 }
282 return false;
283 };
284 if (DefMI && (Immed == 0 || Immed == 3)) {
285 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
286 DEBUG(dbgs()
287 << "Optimizing load-and-splat/splat "
288 "to load-and-splat/copy: ");
289 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000290 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
291 MI.getOperand(0).getReg())
292 .add(MI.getOperand(1));
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000293 ToErase = &MI;
294 Simplified = true;
295 }
296 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000297
298 // If this is a splat or a swap fed by another splat, we
299 // can replace it with a copy.
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000300 if (DefOpc == PPC::XXPERMDI) {
Bill Schmidt34af5e12015-11-10 21:38:26 +0000301 unsigned FeedImmed = DefMI->getOperand(3).getImm();
302 unsigned FeedReg1
303 = lookThruCopyLike(DefMI->getOperand(1).getReg());
304 unsigned FeedReg2
305 = lookThruCopyLike(DefMI->getOperand(2).getReg());
306
307 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
308 DEBUG(dbgs()
309 << "Optimizing splat/swap or splat/splat "
310 "to splat/copy: ");
311 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000312 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
313 MI.getOperand(0).getReg())
314 .add(MI.getOperand(1));
Bill Schmidt34af5e12015-11-10 21:38:26 +0000315 ToErase = &MI;
316 Simplified = true;
317 }
318
319 // If this is a splat fed by a swap, we can simplify modify
320 // the splat to splat the other value from the swap's input
321 // parameter.
322 else if ((Immed == 0 || Immed == 3)
323 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
324 DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
325 DEBUG(MI.dump());
326 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
327 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
328 MI.getOperand(3).setImm(3 - Immed);
329 Simplified = true;
330 }
331
332 // If this is a swap fed by a swap, we can replace it
333 // with a copy from the first swap's input.
334 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
335 DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
336 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000337 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
338 MI.getOperand(0).getReg())
339 .add(DefMI->getOperand(1));
Bill Schmidt34af5e12015-11-10 21:38:26 +0000340 ToErase = &MI;
341 Simplified = true;
342 }
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000343 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
344 (DefMI->getOperand(2).getImm() == 0 ||
345 DefMI->getOperand(2).getImm() == 3)) {
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000346 // Splat fed by another splat - switch the output of the first
347 // and remove the second.
348 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
349 ToErase = &MI;
350 Simplified = true;
351 DEBUG(dbgs() << "Removing redundant splat: ");
352 DEBUG(MI.dump());
Bill Schmidt34af5e12015-11-10 21:38:26 +0000353 }
354 }
355 }
356 break;
357 }
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000358 case PPC::VSPLTB:
359 case PPC::VSPLTH:
360 case PPC::XXSPLTW: {
361 unsigned MyOpcode = MI.getOpcode();
362 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
363 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000364 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
365 break;
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000366 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
367 if (!DefMI)
368 break;
369 unsigned DefOpcode = DefMI->getOpcode();
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000370 auto isConvertOfSplat = [=]() -> bool {
371 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
372 return false;
373 unsigned ConvReg = DefMI->getOperand(1).getReg();
374 if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
375 return false;
376 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
377 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
378 Splt->getOpcode() == PPC::XXSPLTW);
379 };
380 bool AlreadySplat = (MyOpcode == DefOpcode) ||
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000381 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
382 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000383 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
384 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
385 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
386 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
387 // If the instruction[s] that feed this splat have already splat
388 // the value, this splat is redundant.
389 if (AlreadySplat) {
Tim Shen4ff62b12016-10-12 00:48:25 +0000390 DEBUG(dbgs() << "Changing redundant splat to a copy: ");
391 DEBUG(MI.dump());
392 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
393 MI.getOperand(0).getReg())
Diana Picus116bbab2017-01-13 09:58:52 +0000394 .add(MI.getOperand(OpNo));
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000395 ToErase = &MI;
396 Simplified = true;
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000397 }
398 // Splat fed by a shift. Usually when we align value to splat into
399 // vector element zero.
400 if (DefOpcode == PPC::XXSLDWI) {
401 unsigned ShiftRes = DefMI->getOperand(0).getReg();
402 unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
403 unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
404 unsigned ShiftImm = DefMI->getOperand(3).getImm();
405 unsigned SplatImm = MI.getOperand(2).getImm();
406 if (ShiftOp1 == ShiftOp2) {
407 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
408 if (MRI->hasOneNonDBGUse(ShiftRes)) {
409 DEBUG(dbgs() << "Removing redundant shift: ");
410 DEBUG(DefMI->dump());
411 ToErase = DefMI;
412 }
413 Simplified = true;
414 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
415 " to " << NewElem << " in instruction: ");
416 DEBUG(MI.dump());
417 MI.getOperand(1).setReg(ShiftOp1);
418 MI.getOperand(2).setImm(NewElem);
419 }
420 }
421 break;
422 }
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000423 case PPC::XVCVDPSP: {
424 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
425 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
426 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
427 break;
428 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
429
430 // This can occur when building a vector of single precision or integer
431 // values.
432 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
433 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
434 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
435 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
436 !TargetRegisterInfo::isVirtualRegister(DefsReg2))
437 break;
438 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
439 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
440
441 if (!P1 || !P2)
442 break;
443
444 // Remove the passed FRSP instruction if it only feeds this MI and
445 // set any uses of that FRSP (in this MI) to the source of the FRSP.
446 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
447 if (RoundInstr->getOpcode() == PPC::FRSP &&
448 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
449 Simplified = true;
450 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
451 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
452 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
453 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
454 if (Use.getOperand(i).isReg() &&
455 Use.getOperand(i).getReg() == FRSPDefines)
456 Use.getOperand(i).setReg(ConvReg1);
457 DEBUG(dbgs() << "Removing redundant FRSP:\n");
458 DEBUG(RoundInstr->dump());
459 DEBUG(dbgs() << "As it feeds instruction:\n");
460 DEBUG(MI.dump());
461 DEBUG(dbgs() << "Through instruction:\n");
462 DEBUG(DefMI->dump());
463 RoundInstr->eraseFromParent();
464 }
465 };
466
467 // If the input to XVCVDPSP is a vector that was built (even
468 // partially) out of FRSP's, the FRSP(s) can safely be removed
469 // since this instruction performs the same operation.
470 if (P1 != P2) {
471 removeFRSPIfPossible(P1);
472 removeFRSPIfPossible(P2);
473 break;
474 }
475 removeFRSPIfPossible(P1);
476 }
477 break;
478 }
Hiroshi Inouee3a3e3c2017-10-16 04:12:57 +0000479 case PPC::EXTSH:
480 case PPC::EXTSH8:
481 case PPC::EXTSH8_32_64: {
482 if (!EnableSExtElimination) break;
483 unsigned NarrowReg = MI.getOperand(1).getReg();
484 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
485 break;
486
487 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
488 // If we've used a zero-extending load that we will sign-extend,
489 // just do a sign-extending load.
490 if (SrcMI->getOpcode() == PPC::LHZ ||
491 SrcMI->getOpcode() == PPC::LHZX) {
492 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
493 break;
494 auto is64Bit = [] (unsigned Opcode) {
495 return Opcode == PPC::EXTSH8;
496 };
497 auto isXForm = [] (unsigned Opcode) {
498 return Opcode == PPC::LHZX;
499 };
500 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
501 if (is64Bit)
502 if (isXForm) return PPC::LHAX8;
503 else return PPC::LHA8;
504 else
505 if (isXForm) return PPC::LHAX;
506 else return PPC::LHA;
507 };
508 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
509 isXForm(SrcMI->getOpcode()));
510 DEBUG(dbgs() << "Zero-extending load\n");
511 DEBUG(SrcMI->dump());
512 DEBUG(dbgs() << "and sign-extension\n");
513 DEBUG(MI.dump());
514 DEBUG(dbgs() << "are merged into sign-extending load\n");
515 SrcMI->setDesc(TII->get(Opc));
516 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
517 ToErase = &MI;
518 Simplified = true;
519 NumEliminatedSExt++;
520 }
521 break;
522 }
523 case PPC::EXTSW:
524 case PPC::EXTSW_32:
525 case PPC::EXTSW_32_64: {
526 if (!EnableSExtElimination) break;
527 unsigned NarrowReg = MI.getOperand(1).getReg();
528 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
529 break;
530
531 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
532 // If we've used a zero-extending load that we will sign-extend,
533 // just do a sign-extending load.
534 if (SrcMI->getOpcode() == PPC::LWZ ||
535 SrcMI->getOpcode() == PPC::LWZX) {
536 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
537 break;
538 auto is64Bit = [] (unsigned Opcode) {
539 return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
540 };
541 auto isXForm = [] (unsigned Opcode) {
542 return Opcode == PPC::LWZX;
543 };
544 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
545 if (is64Bit)
546 if (isXForm) return PPC::LWAX;
547 else return PPC::LWA;
548 else
549 if (isXForm) return PPC::LWAX_32;
550 else return PPC::LWA_32;
551 };
552 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
553 isXForm(SrcMI->getOpcode()));
554 DEBUG(dbgs() << "Zero-extending load\n");
555 DEBUG(SrcMI->dump());
556 DEBUG(dbgs() << "and sign-extension\n");
557 DEBUG(MI.dump());
558 DEBUG(dbgs() << "are merged into sign-extending load\n");
559 SrcMI->setDesc(TII->get(Opc));
560 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
561 ToErase = &MI;
562 Simplified = true;
563 NumEliminatedSExt++;
564 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
565 TII->isSignExtended(*SrcMI)) {
566 // We can eliminate EXTSW if the input is known to be already
567 // sign-extended.
568 DEBUG(dbgs() << "Removing redundant sign-extension\n");
569 unsigned TmpReg =
570 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
571 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
572 TmpReg);
573 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
574 MI.getOperand(0).getReg())
575 .addReg(TmpReg)
576 .addReg(NarrowReg)
577 .addImm(PPC::sub_32);
578 ToErase = &MI;
579 Simplified = true;
580 NumEliminatedSExt++;
581 }
582 break;
583 }
584 case PPC::RLDICL: {
585 // We can eliminate RLDICL (e.g. for zero-extension)
586 // if all bits to clear are already zero in the input.
587 // This code assume following code sequence for zero-extension.
588 // %vreg6<def> = COPY %vreg5:sub_32; (optional)
589 // %vreg8<def> = IMPLICIT_DEF;
590 // %vreg7<def,tied1> = INSERT_SUBREG %vreg8<tied0>, %vreg6, sub_32;
591 if (!EnableZExtElimination) break;
592
593 if (MI.getOperand(2).getImm() != 0)
594 break;
595
596 unsigned SrcReg = MI.getOperand(1).getReg();
597 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
598 break;
599
600 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
601 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
602 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
603 break;
604
605 MachineInstr *ImpDefMI, *SubRegMI;
606 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
607 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
608 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
609
610 SrcMI = SubRegMI;
611 if (SubRegMI->getOpcode() == PPC::COPY) {
612 unsigned CopyReg = SubRegMI->getOperand(1).getReg();
613 if (TargetRegisterInfo::isVirtualRegister(CopyReg))
614 SrcMI = MRI->getVRegDef(CopyReg);
615 }
616
617 unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
618 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
619 DEBUG(dbgs() << "Removing redundant zero-extension\n");
620 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
621 MI.getOperand(0).getReg())
622 .addReg(SrcReg);
623 ToErase = &MI;
624 Simplified = true;
625 NumEliminatedZExt++;
626 }
627 break;
628 }
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000629
630 // TODO: Any instruction that has an immediate form fed only by a PHI
631 // whose operands are all load immediate can be folded away. We currently
632 // do this for ADD instructions, but should expand it to arithmetic and
633 // binary instructions with immediate forms in the future.
634 case PPC::ADD4:
635 case PPC::ADD8: {
636 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
637 assert(PhiOp && "Invalid Operand!");
638 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
639
640 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
641 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
642 };
643
644 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
645 MachineOperand *PhiOp) {
646 assert(PhiOp && "Invalid Operand!");
647 assert(DominatorOp && "Invalid Operand!");
648 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
649 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
650
651 // Note: the vregs only show up at odd indices position of PHI Node,
652 // the even indices position save the BB info.
653 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
654 MachineInstr *LiMI =
655 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
Nemanja Ivanovic7bf866e2017-10-10 08:46:10 +0000656 if (!LiMI ||
657 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
658 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
659 !MDT->dominates(DefDomMI, LiMI))
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000660 return false;
661 }
662
663 return true;
664 };
665
666 MachineOperand Op1 = MI.getOperand(1);
667 MachineOperand Op2 = MI.getOperand(2);
668 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
669 std::swap(Op1, Op2);
670 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
671 break; // We don't have an ADD fed by LI's that can be transformed
672
673 // Now we know that Op1 is the PHI node and Op2 is the dominator
674 unsigned DominatorReg = Op2.getReg();
675
676 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
677 ? &PPC::G8RC_and_G8RC_NOX0RegClass
678 : &PPC::GPRC_and_GPRC_NOR0RegClass;
679 MRI->setRegClass(DominatorReg, TRC);
680
681 // replace LIs with ADDIs
682 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
683 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
684 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
685 DEBUG(dbgs() << "Optimizing LI to ADDI: ");
686 DEBUG(LiMI->dump());
687
688 // There could be repeated registers in the PHI, e.g: %vreg1<def> =
689 // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've
690 // already replaced the def instruction, skip.
691 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
692 continue;
693
694 assert((LiMI->getOpcode() == PPC::LI ||
695 LiMI->getOpcode() == PPC::LI8) &&
696 "Invalid Opcode!");
697 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
698 LiMI->RemoveOperand(1); // remove the imm of LI
699 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
700 : PPC::ADDI8));
701 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
702 .addReg(DominatorReg)
703 .addImm(LiImm); // restore the imm of LI
704 DEBUG(LiMI->dump());
705 }
706
707 // Replace ADD with COPY
708 DEBUG(dbgs() << "Optimizing ADD to COPY: ");
709 DEBUG(MI.dump());
710 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
711 MI.getOperand(0).getReg())
712 .add(Op1);
713 ToErase = &MI;
714 Simplified = true;
715 NumOptADDLIs++;
716 break;
717 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000718 }
719 }
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000720
Bill Schmidt34af5e12015-11-10 21:38:26 +0000721 // If the last instruction was marked for elimination,
722 // remove it now.
723 if (ToErase) {
724 ToErase->eraseFromParent();
725 ToErase = nullptr;
726 }
727 }
728
Zaara Syedaf94d58d2017-11-27 20:26:36 +0000729 // Eliminate all the TOC save instructions which are redundant.
730 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000731 // We try to eliminate redundant compare instruction.
732 Simplified |= eliminateRedundantCompare();
733
734 return Simplified;
735}
736
737// helper functions for eliminateRedundantCompare
738static bool isEqOrNe(MachineInstr *BI) {
739 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
740 unsigned PredCond = PPC::getPredicateCondition(Pred);
741 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
742}
743
744static bool isSupportedCmpOp(unsigned opCode) {
745 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
746 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
747 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
748 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
749}
750
751static bool is64bitCmpOp(unsigned opCode) {
752 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
753 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
754}
755
756static bool isSignedCmpOp(unsigned opCode) {
757 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
758 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
759}
760
761static unsigned getSignedCmpOpCode(unsigned opCode) {
762 if (opCode == PPC::CMPLD) return PPC::CMPD;
763 if (opCode == PPC::CMPLW) return PPC::CMPW;
764 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
765 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
766 return opCode;
767}
768
769// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
770// (LT x) to (LE x-1)
771static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
772 uint64_t Imm = CMPI->getOperand(2).getImm();
773 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
774 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
775 return 0;
776
777 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
778 unsigned PredCond = PPC::getPredicateCondition(Pred);
779 unsigned PredHint = PPC::getPredicateHint(Pred);
780 if (PredCond == PPC::PRED_GE)
781 return PPC::getPredicate(PPC::PRED_GT, PredHint);
782 if (PredCond == PPC::PRED_LT)
783 return PPC::getPredicate(PPC::PRED_LE, PredHint);
784
785 return 0;
786}
787
788// We can increment immediate x in (GT x) by changing it to (GE x+1) or
789// (LE x) to (LT x+1)
790static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
791 uint64_t Imm = CMPI->getOperand(2).getImm();
792 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
793 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
794 return 0;
795
796 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
797 unsigned PredCond = PPC::getPredicateCondition(Pred);
798 unsigned PredHint = PPC::getPredicateHint(Pred);
799 if (PredCond == PPC::PRED_GT)
800 return PPC::getPredicate(PPC::PRED_GE, PredHint);
801 if (PredCond == PPC::PRED_LE)
802 return PPC::getPredicate(PPC::PRED_LT, PredHint);
803
804 return 0;
805}
806
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000807// This takes a Phi node and returns a register value for the spefied BB.
808static unsigned getIncomingRegForBlock(MachineInstr *Phi,
809 MachineBasicBlock *MBB) {
810 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
811 MachineOperand &MO = Phi->getOperand(I);
812 if (MO.getMBB() == MBB)
813 return Phi->getOperand(I-1).getReg();
814 }
815 llvm_unreachable("invalid src basic block for this Phi node\n");
816 return 0;
817}
818
819// This function tracks the source of the register through register copy.
820// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
821// assuming that the control comes from BB1 into BB2.
822static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
823 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
824 unsigned SrcReg = Reg;
825 while (1) {
826 unsigned NextReg = SrcReg;
827 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
828 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
829 NextReg = getIncomingRegForBlock(Inst, BB1);
830 // We track through PHI only once to avoid infinite loop.
831 BB1 = nullptr;
832 }
833 else if (Inst->isFullCopy())
834 NextReg = Inst->getOperand(1).getReg();
835 if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
836 break;
837 SrcReg = NextReg;
838 }
839 return SrcReg;
840}
841
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000842static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000843 MachineBasicBlock *&PredMBB,
844 MachineBasicBlock *&MBBtoMoveCmp,
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000845 MachineRegisterInfo *MRI) {
846
847 auto isEligibleBB = [&](MachineBasicBlock &BB) {
848 auto BII = BB.getFirstInstrTerminator();
849 // We optimize BBs ending with a conditional branch.
850 // We check only for BCC here, not BCCLR, because BCCLR
851 // will be formed only later in the pipeline.
852 if (BB.succ_size() == 2 &&
853 BII != BB.instr_end() &&
854 (*BII).getOpcode() == PPC::BCC &&
855 (*BII).getOperand(1).isReg()) {
856 // We optimize only if the condition code is used only by one BCC.
857 unsigned CndReg = (*BII).getOperand(1).getReg();
858 if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
859 !MRI->hasOneNonDBGUse(CndReg))
860 return false;
861
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000862 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
Hiroshi Inoue72a1f982017-11-15 04:23:26 +0000863 // We assume compare and branch are in the same BB for ease of analysis.
864 if (CMPI->getParent() != &BB)
865 return false;
866
867 // We skip this BB if a physical register is used in comparison.
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000868 for (MachineOperand &MO : CMPI->operands())
869 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
870 return false;
871
872 return true;
873 }
874 return false;
875 };
876
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000877 // If this BB has more than one successor, we can create a new BB and
878 // move the compare instruction in the new BB.
879 // So far, we do not move compare instruction to a BB having multiple
880 // successors to avoid potentially increasing code size.
881 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
882 return BB.succ_size() == 1;
883 };
884
885 if (!isEligibleBB(MBB))
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000886 return false;
887
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000888 unsigned NumPredBBs = MBB.pred_size();
889 if (NumPredBBs == 1) {
890 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
891 if (isEligibleBB(*TmpMBB)) {
892 PredMBB = TmpMBB;
893 MBBtoMoveCmp = nullptr;
894 return true;
895 }
896 }
897 else if (NumPredBBs == 2) {
898 // We check for partially redundant case.
899 // So far, we support cases with only two predecessors
900 // to avoid increasing the number of instructions.
901 MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
902 MachineBasicBlock *Pred1MBB = *PI;
903 MachineBasicBlock *Pred2MBB = *(PI+1);
904
905 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
906 // We assume Pred1MBB is the BB containing the compare to be merged and
907 // Pred2MBB is the BB to which we will append a compare instruction.
908 // Hence we can proceed as is.
909 }
910 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
911 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
912 std::swap(Pred1MBB, Pred2MBB);
913 }
914 else return false;
915
916 // Here, Pred2MBB is the BB to which we need to append a compare inst.
917 // We cannot move the compare instruction if operands are not available
918 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
919 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
920 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
921 for (int I = 1; I <= 2; I++)
922 if (CMPI->getOperand(I).isReg()) {
923 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
924 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
925 return false;
926 }
927
928 PredMBB = Pred1MBB;
929 MBBtoMoveCmp = Pred2MBB;
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000930 return true;
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000931 }
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000932
933 return false;
934}
935
Zaara Syedaf94d58d2017-11-27 20:26:36 +0000936// This function will iterate over the input map containing a pair of TOC save
937// instruction and a flag. The flag will be set to false if the TOC save is proven
938// redundant. This function will erase from the basic block all the TOC saves
939// marked as redundant.
940bool PPCMIPeephole::eliminateRedundantTOCSaves(
941 std::map<MachineInstr *, bool> &TOCSaves) {
942 bool Simplified = false;
943 int NumKept = 0;
944 for (auto TOCSave : TOCSaves) {
945 if (!TOCSave.second) {
946 TOCSave.first->eraseFromParent();
947 RemoveTOCSave++;
948 Simplified = true;
949 } else {
950 NumKept++;
951 }
952 }
953
954 if (NumKept > 1)
955 MultiTOCSaves++;
956
957 return Simplified;
958}
959
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000960// If multiple conditional branches are executed based on the (essentially)
961// same comparison, we merge compare instructions into one and make multiple
962// conditional branches on this comparison.
963// For example,
964// if (a == 0) { ... }
965// else if (a < 0) { ... }
966// can be executed by one compare and two conditional branches instead of
967// two pairs of a compare and a conditional branch.
968//
969// This method merges two compare instructions in two MBBs and modifies the
970// compare and conditional branch instructions if needed.
971// For the above example, the input for this pass looks like:
972// cmplwi r3, 0
973// beq 0, .LBB0_3
974// cmpwi r3, -1
975// bgt 0, .LBB0_4
976// So, before merging two compares, we need to modify these instructions as
977// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
978// beq 0, .LBB0_3
979// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
980// bge 0, .LBB0_4
981
982bool PPCMIPeephole::eliminateRedundantCompare(void) {
983 bool Simplified = false;
984
985 for (MachineBasicBlock &MBB2 : *MF) {
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000986 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
987
988 // For fully redundant case, we select two basic blocks MBB1 and MBB2
989 // as an optimization target if
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000990 // - both MBBs end with a conditional branch,
991 // - MBB1 is the only predecessor of MBB2, and
992 // - compare does not take a physical register as a operand in both MBBs.
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000993 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
994 //
995 // As partially redundant case, we additionally handle if MBB2 has one
996 // additional predecessor, which has only one successor (MBB2).
Hiroshi Inoue72a1f982017-11-15 04:23:26 +0000997 // In this case, we move the compare instruction originally in MBB2 into
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +0000998 // MBBtoMoveCmp. This partially redundant case is typically appear by
999 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1000 //
1001 // Overview of CFG of related basic blocks
1002 // Fully redundant case Partially redundant case
1003 // -------- ---------------- --------
1004 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1005 // -------- ---------------- --------
1006 // | \ (w/ 1 succ) \ | \
1007 // | \ \ | \
1008 // | \ |
1009 // -------- --------
1010 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1011 // -------- and 2 succ) -------- and 2 succ)
1012 // | \ | \
1013 // | \ | \
1014 //
1015 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001016 continue;
1017
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001018 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1019 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1020
1021 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1022 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001023 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001024
1025 // We cannot optimize an unsupported compare opcode or
1026 // a mix of 32-bit and 64-bit comaprisons
1027 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1028 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1029 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1030 continue;
1031
1032 unsigned NewOpCode = 0;
1033 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1034 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001035 bool SwapOperands = false;
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001036
1037 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1038 // Typically, unsigned comparison is used for equality check, but
1039 // we replace it with a signed comparison if the comparison
1040 // to be merged is a signed comparison.
1041 // In other cases of opcode mismatch, we cannot optimize this.
1042 if (isEqOrNe(BI2) &&
1043 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1044 NewOpCode = CMPI1->getOpcode();
1045 else if (isEqOrNe(BI1) &&
1046 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1047 NewOpCode = CMPI2->getOpcode();
1048 else continue;
1049 }
1050
1051 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1052 // In case of comparisons between two registers, these two registers
1053 // must be same to merge two comparisons.
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001054 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1055 nullptr, nullptr, MRI);
1056 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1057 nullptr, nullptr, MRI);
1058 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1059 MBB1, &MBB2, MRI);
1060 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1061 MBB1, &MBB2, MRI);
1062
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001063 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1064 // Same pair of registers in the same order; ready to merge as is.
1065 }
1066 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1067 // Same pair of registers in different order.
1068 // We reverse the predicate to merge compare instructions.
1069 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1070 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001071 // In case of partial redundancy, we need to swap operands
1072 // in another compare instruction.
1073 SwapOperands = true;
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001074 }
1075 else continue;
1076 }
Hiroshi Inoue224661d2017-10-03 07:28:58 +00001077 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001078 // In case of comparisons between a register and an immediate,
1079 // the operand register must be same for two compare instructions.
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001080 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1081 nullptr, nullptr, MRI);
1082 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1083 MBB1, &MBB2, MRI);
1084 if (Cmp1Operand1 != Cmp2Operand1)
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001085 continue;
1086
1087 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1088 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1089
1090 // If immediate are not same, we try to adjust by changing predicate;
1091 // e.g. GT imm means GE (imm+1).
1092 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1093 int Diff = Imm1 - Imm2;
1094 if (Diff < -2 || Diff > 2)
1095 continue;
1096
1097 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1098 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1099 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1100 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1101 if (Diff == 2) {
1102 if (PredToInc2 && PredToDec1) {
1103 NewPredicate2 = PredToInc2;
1104 NewPredicate1 = PredToDec1;
1105 NewImm2++;
1106 NewImm1--;
1107 }
1108 }
1109 else if (Diff == 1) {
1110 if (PredToInc2) {
1111 NewImm2++;
1112 NewPredicate2 = PredToInc2;
1113 }
1114 else if (PredToDec1) {
1115 NewImm1--;
1116 NewPredicate1 = PredToDec1;
1117 }
1118 }
1119 else if (Diff == -1) {
1120 if (PredToDec2) {
1121 NewImm2--;
1122 NewPredicate2 = PredToDec2;
1123 }
1124 else if (PredToInc1) {
1125 NewImm1++;
1126 NewPredicate1 = PredToInc1;
1127 }
1128 }
1129 else if (Diff == -2) {
1130 if (PredToDec2 && PredToInc1) {
1131 NewPredicate2 = PredToDec2;
1132 NewPredicate1 = PredToInc1;
1133 NewImm2--;
1134 NewImm1++;
1135 }
1136 }
1137 }
1138
1139 // We cannnot merge two compares if the immediates are not same.
1140 if (NewImm2 != NewImm1)
1141 continue;
1142 }
1143
1144 DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1145 DEBUG(CMPI1->dump());
1146 DEBUG(BI1->dump());
1147 DEBUG(CMPI2->dump());
1148 DEBUG(BI2->dump());
1149
1150 // We adjust opcode, predicates and immediate as we determined above.
1151 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1152 CMPI1->setDesc(TII->get(NewOpCode));
1153 }
1154 if (NewPredicate1) {
1155 BI1->getOperand(0).setImm(NewPredicate1);
1156 }
1157 if (NewPredicate2) {
1158 BI2->getOperand(0).setImm(NewPredicate2);
1159 }
1160 if (NewImm1 != Imm1) {
1161 CMPI1->getOperand(2).setImm(NewImm1);
1162 }
1163
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001164 if (IsPartiallyRedundant) {
1165 // We touch up the compare instruction in MBB2 and move it to
1166 // a previous BB to handle partially redundant case.
1167 if (SwapOperands) {
1168 unsigned Op1 = CMPI2->getOperand(1).getReg();
1169 unsigned Op2 = CMPI2->getOperand(2).getReg();
1170 CMPI2->getOperand(1).setReg(Op2);
1171 CMPI2->getOperand(2).setReg(Op1);
1172 }
1173 if (NewImm2 != Imm2)
1174 CMPI2->getOperand(2).setImm(NewImm2);
1175
1176 for (int I = 1; I <= 2; I++) {
1177 if (CMPI2->getOperand(I).isReg()) {
1178 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1179 if (Inst->getParent() != &MBB2)
1180 continue;
1181
1182 assert(Inst->getOpcode() == PPC::PHI &&
1183 "We cannot support if an operand comes from this BB.");
1184 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1185 CMPI2->getOperand(I).setReg(SrcReg);
1186 }
1187 }
1188 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1189 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1190
1191 DebugLoc DL = CMPI2->getDebugLoc();
1192 unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1193 BuildMI(MBB2, MBB2.begin(), DL,
1194 TII->get(PPC::PHI), NewVReg)
1195 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1196 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1197 BI2->getOperand(1).setReg(NewVReg);
1198 }
1199 else {
1200 // We finally eliminate compare instruction in MBB2.
1201 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1202 CMPI2->eraseFromParent();
1203 }
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001204 BI2->getOperand(1).setIsKill(true);
1205 BI1->getOperand(1).setIsKill(false);
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001206
1207 DEBUG(dbgs() << "into a compare and two branches:\n");
1208 DEBUG(CMPI1->dump());
1209 DEBUG(BI1->dump());
1210 DEBUG(BI2->dump());
Hiroshi Inoue79c0bec2017-09-28 08:38:19 +00001211 if (IsPartiallyRedundant) {
1212 DEBUG(dbgs() << "The following compare is moved into BB#" <<
1213 MBBtoMoveCmp->getNumber() << " to handle partial redundancy.\n");
1214 DEBUG(CMPI2->dump());
1215 }
Hiroshi Inoue614453b2017-09-05 04:15:17 +00001216
1217 Simplified = true;
1218 }
1219
Bill Schmidt34af5e12015-11-10 21:38:26 +00001220 return Simplified;
1221}
1222
1223// This is used to find the "true" source register for an
1224// XXPERMDI instruction, since MachineCSE does not handle the
1225// "copy-like" operations (Copy and SubregToReg). Returns
1226// the original SrcReg unless it is the target of a copy-like
1227// operation, in which case we chain backwards through all
1228// such operations to the ultimate source register. If a
1229// physical register is encountered, we stop the search.
1230unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
1231
1232 while (true) {
1233
1234 MachineInstr *MI = MRI->getVRegDef(SrcReg);
1235 if (!MI->isCopyLike())
1236 return SrcReg;
1237
1238 unsigned CopySrcReg;
1239 if (MI->isCopy())
1240 CopySrcReg = MI->getOperand(1).getReg();
1241 else {
1242 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
1243 CopySrcReg = MI->getOperand(2).getReg();
1244 }
1245
1246 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
1247 return CopySrcReg;
1248
1249 SrcReg = CopySrcReg;
1250 }
1251}
1252
1253} // end default namespace
1254
1255INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1256 "PowerPC MI Peephole Optimization", false, false)
1257INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1258 "PowerPC MI Peephole Optimization", false, false)
1259
1260char PPCMIPeephole::ID = 0;
1261FunctionPass*
1262llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1263