blob: 7a5091b41e1bd4d05574351ff4446869af2f00be [file] [log] [blame]
Bill Schmidt34af5e12015-11-10 21:38:26 +00001//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9//
10// This pass performs peephole optimizations to clean up ugly code
11// sequences at the MachineInstruction layer. It runs at the end of
12// the SSA phases, following VSX swap removal. A pass of dead code
13// elimination follows this one for quick clean-up of any dead
14// instructions introduced here. Although we could do this as callbacks
15// from the generic peephole pass, this would have a couple of bad
16// effects: it might remove optimization opportunities for VSX swap
17// removal, and it would miss cleanups made possible following VSX
18// swap removal.
19//
20//===---------------------------------------------------------------------===//
21
Bill Schmidt34af5e12015-11-10 21:38:26 +000022#include "PPC.h"
23#include "PPCInstrBuilder.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000024#include "PPCInstrInfo.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000025#include "PPCTargetMachine.h"
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000026#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineDominators.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000028#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/Debug.h"
Hiroshi Inoue614453b2017-09-05 04:15:17 +000032#include "MCTargetDesc/PPCPredicates.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000033
34using namespace llvm;
35
36#define DEBUG_TYPE "ppc-mi-peepholes"
37
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000038STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
39
Bill Schmidt34af5e12015-11-10 21:38:26 +000040namespace llvm {
41 void initializePPCMIPeepholePass(PassRegistry&);
42}
43
44namespace {
45
46struct PPCMIPeephole : public MachineFunctionPass {
47
48 static char ID;
49 const PPCInstrInfo *TII;
50 MachineFunction *MF;
51 MachineRegisterInfo *MRI;
52
53 PPCMIPeephole() : MachineFunctionPass(ID) {
54 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
55 }
56
57private:
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000058 MachineDominatorTree *MDT;
59
Bill Schmidt34af5e12015-11-10 21:38:26 +000060 // Initialize class variables.
61 void initialize(MachineFunction &MFParm);
62
63 // Perform peepholes.
64 bool simplifyCode(void);
65
Hiroshi Inoue614453b2017-09-05 04:15:17 +000066 // Perform peepholes.
67 bool eliminateRedundantCompare(void);
68
Bill Schmidt34af5e12015-11-10 21:38:26 +000069 // Find the "true" register represented by SrcReg (following chains
70 // of copies and subreg_to_reg operations).
71 unsigned lookThruCopyLike(unsigned SrcReg);
72
73public:
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000074
75 void getAnalysisUsage(AnalysisUsage &AU) const override {
76 AU.addRequired<MachineDominatorTree>();
77 AU.addPreserved<MachineDominatorTree>();
78 MachineFunctionPass::getAnalysisUsage(AU);
79 }
80
Bill Schmidt34af5e12015-11-10 21:38:26 +000081 // Main entry point for this pass.
82 bool runOnMachineFunction(MachineFunction &MF) override {
Andrew Kaylor289bd5f2016-04-27 19:39:32 +000083 if (skipFunction(*MF.getFunction()))
84 return false;
Bill Schmidt34af5e12015-11-10 21:38:26 +000085 initialize(MF);
86 return simplifyCode();
87 }
88};
89
90// Initialize class variables.
91void PPCMIPeephole::initialize(MachineFunction &MFParm) {
92 MF = &MFParm;
93 MRI = &MF->getRegInfo();
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +000094 MDT = &getAnalysis<MachineDominatorTree>();
Bill Schmidt34af5e12015-11-10 21:38:26 +000095 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
96 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
97 DEBUG(MF->dump());
98}
99
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000100static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
101 MachineRegisterInfo *MRI) {
102 assert(Op && "Invalid Operand!");
103 if (!Op->isReg())
104 return nullptr;
105
106 unsigned Reg = Op->getReg();
107 if (!TargetRegisterInfo::isVirtualRegister(Reg))
108 return nullptr;
109
110 return MRI->getVRegDef(Reg);
111}
112
Bill Schmidt34af5e12015-11-10 21:38:26 +0000113// Perform peephole optimizations.
114bool PPCMIPeephole::simplifyCode(void) {
115 bool Simplified = false;
116 MachineInstr* ToErase = nullptr;
117
118 for (MachineBasicBlock &MBB : *MF) {
119 for (MachineInstr &MI : MBB) {
120
121 // If the previous instruction was marked for elimination,
122 // remove it now.
123 if (ToErase) {
124 ToErase->eraseFromParent();
125 ToErase = nullptr;
126 }
127
128 // Ignore debug instructions.
129 if (MI.isDebugValue())
130 continue;
131
132 // Per-opcode peepholes.
133 switch (MI.getOpcode()) {
134
135 default:
136 break;
137
138 case PPC::XXPERMDI: {
139 // Perform simplifications of 2x64 vector swaps and splats.
140 // A swap is identified by an immediate value of 2, and a splat
141 // is identified by an immediate value of 0 or 3.
142 int Immed = MI.getOperand(3).getImm();
143
144 if (Immed != 1) {
145
146 // For each of these simplifications, we need the two source
147 // regs to match. Unfortunately, MachineCSE ignores COPY and
148 // SUBREG_TO_REG, so for example we can see
149 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
150 // We have to look through chains of COPY and SUBREG_TO_REG
151 // to find the real source values for comparison.
152 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
153 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
154
155 if (TrueReg1 == TrueReg2
156 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
157 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000158 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
159
160 // If this is a splat fed by a splatting load, the splat is
161 // redundant. Replace with a copy. This doesn't happen directly due
162 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
163 // a load of a double to a vector of 64-bit integers.
164 auto isConversionOfLoadAndSplat = [=]() -> bool {
165 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
166 return false;
167 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
168 if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
169 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
170 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
171 return true;
172 }
173 return false;
174 };
175 if (DefMI && (Immed == 0 || Immed == 3)) {
176 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
177 DEBUG(dbgs()
178 << "Optimizing load-and-splat/splat "
179 "to load-and-splat/copy: ");
180 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000181 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
182 MI.getOperand(0).getReg())
183 .add(MI.getOperand(1));
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000184 ToErase = &MI;
185 Simplified = true;
186 }
187 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000188
189 // If this is a splat or a swap fed by another splat, we
190 // can replace it with a copy.
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000191 if (DefOpc == PPC::XXPERMDI) {
Bill Schmidt34af5e12015-11-10 21:38:26 +0000192 unsigned FeedImmed = DefMI->getOperand(3).getImm();
193 unsigned FeedReg1
194 = lookThruCopyLike(DefMI->getOperand(1).getReg());
195 unsigned FeedReg2
196 = lookThruCopyLike(DefMI->getOperand(2).getReg());
197
198 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
199 DEBUG(dbgs()
200 << "Optimizing splat/swap or splat/splat "
201 "to splat/copy: ");
202 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000203 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
204 MI.getOperand(0).getReg())
205 .add(MI.getOperand(1));
Bill Schmidt34af5e12015-11-10 21:38:26 +0000206 ToErase = &MI;
207 Simplified = true;
208 }
209
210 // If this is a splat fed by a swap, we can simplify modify
211 // the splat to splat the other value from the swap's input
212 // parameter.
213 else if ((Immed == 0 || Immed == 3)
214 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
215 DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
216 DEBUG(MI.dump());
217 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
218 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
219 MI.getOperand(3).setImm(3 - Immed);
220 Simplified = true;
221 }
222
223 // If this is a swap fed by a swap, we can replace it
224 // with a copy from the first swap's input.
225 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
226 DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
227 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000228 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
229 MI.getOperand(0).getReg())
230 .add(DefMI->getOperand(1));
Bill Schmidt34af5e12015-11-10 21:38:26 +0000231 ToErase = &MI;
232 Simplified = true;
233 }
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000234 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
235 (DefMI->getOperand(2).getImm() == 0 ||
236 DefMI->getOperand(2).getImm() == 3)) {
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000237 // Splat fed by another splat - switch the output of the first
238 // and remove the second.
239 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
240 ToErase = &MI;
241 Simplified = true;
242 DEBUG(dbgs() << "Removing redundant splat: ");
243 DEBUG(MI.dump());
Bill Schmidt34af5e12015-11-10 21:38:26 +0000244 }
245 }
246 }
247 break;
248 }
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000249 case PPC::VSPLTB:
250 case PPC::VSPLTH:
251 case PPC::XXSPLTW: {
252 unsigned MyOpcode = MI.getOpcode();
253 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
254 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000255 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
256 break;
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000257 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
258 if (!DefMI)
259 break;
260 unsigned DefOpcode = DefMI->getOpcode();
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000261 auto isConvertOfSplat = [=]() -> bool {
262 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
263 return false;
264 unsigned ConvReg = DefMI->getOperand(1).getReg();
265 if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
266 return false;
267 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
268 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
269 Splt->getOpcode() == PPC::XXSPLTW);
270 };
271 bool AlreadySplat = (MyOpcode == DefOpcode) ||
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000272 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
273 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000274 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
275 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
276 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
277 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
278 // If the instruction[s] that feed this splat have already splat
279 // the value, this splat is redundant.
280 if (AlreadySplat) {
Tim Shen4ff62b12016-10-12 00:48:25 +0000281 DEBUG(dbgs() << "Changing redundant splat to a copy: ");
282 DEBUG(MI.dump());
283 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
284 MI.getOperand(0).getReg())
Diana Picus116bbab2017-01-13 09:58:52 +0000285 .add(MI.getOperand(OpNo));
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000286 ToErase = &MI;
287 Simplified = true;
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000288 }
289 // Splat fed by a shift. Usually when we align value to splat into
290 // vector element zero.
291 if (DefOpcode == PPC::XXSLDWI) {
292 unsigned ShiftRes = DefMI->getOperand(0).getReg();
293 unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
294 unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
295 unsigned ShiftImm = DefMI->getOperand(3).getImm();
296 unsigned SplatImm = MI.getOperand(2).getImm();
297 if (ShiftOp1 == ShiftOp2) {
298 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
299 if (MRI->hasOneNonDBGUse(ShiftRes)) {
300 DEBUG(dbgs() << "Removing redundant shift: ");
301 DEBUG(DefMI->dump());
302 ToErase = DefMI;
303 }
304 Simplified = true;
305 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
306 " to " << NewElem << " in instruction: ");
307 DEBUG(MI.dump());
308 MI.getOperand(1).setReg(ShiftOp1);
309 MI.getOperand(2).setImm(NewElem);
310 }
311 }
312 break;
313 }
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000314 case PPC::XVCVDPSP: {
315 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
316 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
317 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
318 break;
319 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
320
321 // This can occur when building a vector of single precision or integer
322 // values.
323 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
324 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
325 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
326 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
327 !TargetRegisterInfo::isVirtualRegister(DefsReg2))
328 break;
329 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
330 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
331
332 if (!P1 || !P2)
333 break;
334
335 // Remove the passed FRSP instruction if it only feeds this MI and
336 // set any uses of that FRSP (in this MI) to the source of the FRSP.
337 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
338 if (RoundInstr->getOpcode() == PPC::FRSP &&
339 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
340 Simplified = true;
341 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
342 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
343 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
344 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
345 if (Use.getOperand(i).isReg() &&
346 Use.getOperand(i).getReg() == FRSPDefines)
347 Use.getOperand(i).setReg(ConvReg1);
348 DEBUG(dbgs() << "Removing redundant FRSP:\n");
349 DEBUG(RoundInstr->dump());
350 DEBUG(dbgs() << "As it feeds instruction:\n");
351 DEBUG(MI.dump());
352 DEBUG(dbgs() << "Through instruction:\n");
353 DEBUG(DefMI->dump());
354 RoundInstr->eraseFromParent();
355 }
356 };
357
358 // If the input to XVCVDPSP is a vector that was built (even
359 // partially) out of FRSP's, the FRSP(s) can safely be removed
360 // since this instruction performs the same operation.
361 if (P1 != P2) {
362 removeFRSPIfPossible(P1);
363 removeFRSPIfPossible(P2);
364 break;
365 }
366 removeFRSPIfPossible(P1);
367 }
368 break;
369 }
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000370
371 // TODO: Any instruction that has an immediate form fed only by a PHI
372 // whose operands are all load immediate can be folded away. We currently
373 // do this for ADD instructions, but should expand it to arithmetic and
374 // binary instructions with immediate forms in the future.
375 case PPC::ADD4:
376 case PPC::ADD8: {
377 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
378 assert(PhiOp && "Invalid Operand!");
379 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
380
381 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
382 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
383 };
384
385 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
386 MachineOperand *PhiOp) {
387 assert(PhiOp && "Invalid Operand!");
388 assert(DominatorOp && "Invalid Operand!");
389 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
390 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
391
392 // Note: the vregs only show up at odd indices position of PHI Node,
393 // the even indices position save the BB info.
394 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
395 MachineInstr *LiMI =
396 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
397 if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
398 !MDT->dominates(DefDomMI, LiMI) ||
399 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8))
400 return false;
401 }
402
403 return true;
404 };
405
406 MachineOperand Op1 = MI.getOperand(1);
407 MachineOperand Op2 = MI.getOperand(2);
408 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
409 std::swap(Op1, Op2);
410 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
411 break; // We don't have an ADD fed by LI's that can be transformed
412
413 // Now we know that Op1 is the PHI node and Op2 is the dominator
414 unsigned DominatorReg = Op2.getReg();
415
416 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
417 ? &PPC::G8RC_and_G8RC_NOX0RegClass
418 : &PPC::GPRC_and_GPRC_NOR0RegClass;
419 MRI->setRegClass(DominatorReg, TRC);
420
421 // replace LIs with ADDIs
422 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
423 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
424 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
425 DEBUG(dbgs() << "Optimizing LI to ADDI: ");
426 DEBUG(LiMI->dump());
427
428 // There could be repeated registers in the PHI, e.g: %vreg1<def> =
429 // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've
430 // already replaced the def instruction, skip.
431 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
432 continue;
433
434 assert((LiMI->getOpcode() == PPC::LI ||
435 LiMI->getOpcode() == PPC::LI8) &&
436 "Invalid Opcode!");
437 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
438 LiMI->RemoveOperand(1); // remove the imm of LI
439 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
440 : PPC::ADDI8));
441 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
442 .addReg(DominatorReg)
443 .addImm(LiImm); // restore the imm of LI
444 DEBUG(LiMI->dump());
445 }
446
447 // Replace ADD with COPY
448 DEBUG(dbgs() << "Optimizing ADD to COPY: ");
449 DEBUG(MI.dump());
450 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
451 MI.getOperand(0).getReg())
452 .add(Op1);
453 ToErase = &MI;
454 Simplified = true;
455 NumOptADDLIs++;
456 break;
457 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000458 }
459 }
Tony Jiang2d9c5f3b2017-09-19 16:14:37 +0000460
Bill Schmidt34af5e12015-11-10 21:38:26 +0000461 // If the last instruction was marked for elimination,
462 // remove it now.
463 if (ToErase) {
464 ToErase->eraseFromParent();
465 ToErase = nullptr;
466 }
467 }
468
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000469 // We try to eliminate redundant compare instruction.
470 Simplified |= eliminateRedundantCompare();
471
472 return Simplified;
473}
474
475// helper functions for eliminateRedundantCompare
476static bool isEqOrNe(MachineInstr *BI) {
477 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
478 unsigned PredCond = PPC::getPredicateCondition(Pred);
479 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
480}
481
482static bool isSupportedCmpOp(unsigned opCode) {
483 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
484 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
485 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
486 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
487}
488
489static bool is64bitCmpOp(unsigned opCode) {
490 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
491 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
492}
493
494static bool isSignedCmpOp(unsigned opCode) {
495 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
496 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
497}
498
499static unsigned getSignedCmpOpCode(unsigned opCode) {
500 if (opCode == PPC::CMPLD) return PPC::CMPD;
501 if (opCode == PPC::CMPLW) return PPC::CMPW;
502 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
503 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
504 return opCode;
505}
506
507// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
508// (LT x) to (LE x-1)
509static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
510 uint64_t Imm = CMPI->getOperand(2).getImm();
511 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
512 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
513 return 0;
514
515 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
516 unsigned PredCond = PPC::getPredicateCondition(Pred);
517 unsigned PredHint = PPC::getPredicateHint(Pred);
518 if (PredCond == PPC::PRED_GE)
519 return PPC::getPredicate(PPC::PRED_GT, PredHint);
520 if (PredCond == PPC::PRED_LT)
521 return PPC::getPredicate(PPC::PRED_LE, PredHint);
522
523 return 0;
524}
525
526// We can increment immediate x in (GT x) by changing it to (GE x+1) or
527// (LE x) to (LT x+1)
528static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
529 uint64_t Imm = CMPI->getOperand(2).getImm();
530 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
531 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
532 return 0;
533
534 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
535 unsigned PredCond = PPC::getPredicateCondition(Pred);
536 unsigned PredHint = PPC::getPredicateHint(Pred);
537 if (PredCond == PPC::PRED_GT)
538 return PPC::getPredicate(PPC::PRED_GE, PredHint);
539 if (PredCond == PPC::PRED_LE)
540 return PPC::getPredicate(PPC::PRED_LT, PredHint);
541
542 return 0;
543}
544
545static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
546 MachineRegisterInfo *MRI) {
547
548 auto isEligibleBB = [&](MachineBasicBlock &BB) {
549 auto BII = BB.getFirstInstrTerminator();
550 // We optimize BBs ending with a conditional branch.
551 // We check only for BCC here, not BCCLR, because BCCLR
552 // will be formed only later in the pipeline.
553 if (BB.succ_size() == 2 &&
554 BII != BB.instr_end() &&
555 (*BII).getOpcode() == PPC::BCC &&
556 (*BII).getOperand(1).isReg()) {
557 // We optimize only if the condition code is used only by one BCC.
558 unsigned CndReg = (*BII).getOperand(1).getReg();
559 if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
560 !MRI->hasOneNonDBGUse(CndReg))
561 return false;
562
563 // We skip this BB if a physical register is used in comparison.
564 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
565 for (MachineOperand &MO : CMPI->operands())
566 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
567 return false;
568
569 return true;
570 }
571 return false;
572 };
573
574 if (MBB.pred_size() != 1)
575 return false;
576
577 MachineBasicBlock *PredMBB = *MBB.pred_begin();
578 if (isEligibleBB(MBB) && isEligibleBB(*PredMBB))
579 return true;
580
581 return false;
582}
583
584// If multiple conditional branches are executed based on the (essentially)
585// same comparison, we merge compare instructions into one and make multiple
586// conditional branches on this comparison.
587// For example,
588// if (a == 0) { ... }
589// else if (a < 0) { ... }
590// can be executed by one compare and two conditional branches instead of
591// two pairs of a compare and a conditional branch.
592//
593// This method merges two compare instructions in two MBBs and modifies the
594// compare and conditional branch instructions if needed.
595// For the above example, the input for this pass looks like:
596// cmplwi r3, 0
597// beq 0, .LBB0_3
598// cmpwi r3, -1
599// bgt 0, .LBB0_4
600// So, before merging two compares, we need to modify these instructions as
601// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
602// beq 0, .LBB0_3
603// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
604// bge 0, .LBB0_4
605
606bool PPCMIPeephole::eliminateRedundantCompare(void) {
607 bool Simplified = false;
608
609 for (MachineBasicBlock &MBB2 : *MF) {
610 // We only consider two basic blocks MBB1 and MBB2 if
611 // - both MBBs end with a conditional branch,
612 // - MBB1 is the only predecessor of MBB2, and
613 // - compare does not take a physical register as a operand in both MBBs.
614 if (!eligibleForCompareElimination(MBB2, MRI))
615 continue;
616
617 MachineBasicBlock *MBB1 = *MBB2.pred_begin();
618 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
619 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
620
621 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
622 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
623
624 // We cannot optimize an unsupported compare opcode or
625 // a mix of 32-bit and 64-bit comaprisons
626 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
627 !isSupportedCmpOp(CMPI2->getOpcode()) ||
628 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
629 continue;
630
631 unsigned NewOpCode = 0;
632 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
633 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
634
635 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
636 // Typically, unsigned comparison is used for equality check, but
637 // we replace it with a signed comparison if the comparison
638 // to be merged is a signed comparison.
639 // In other cases of opcode mismatch, we cannot optimize this.
640 if (isEqOrNe(BI2) &&
641 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
642 NewOpCode = CMPI1->getOpcode();
643 else if (isEqOrNe(BI1) &&
644 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
645 NewOpCode = CMPI2->getOpcode();
646 else continue;
647 }
648
649 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
650 // In case of comparisons between two registers, these two registers
651 // must be same to merge two comparisons.
652 unsigned Cmp1Operand1 = CMPI1->getOperand(1).getReg();
653 unsigned Cmp1Operand2 = CMPI1->getOperand(2).getReg();
654 unsigned Cmp2Operand1 = CMPI2->getOperand(1).getReg();
655 unsigned Cmp2Operand2 = CMPI2->getOperand(2).getReg();
656 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
657 // Same pair of registers in the same order; ready to merge as is.
658 }
659 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
660 // Same pair of registers in different order.
661 // We reverse the predicate to merge compare instructions.
662 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
663 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
664 }
665 else continue;
666 }
667 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()){
668 // In case of comparisons between a register and an immediate,
669 // the operand register must be same for two compare instructions.
670 if (CMPI1->getOperand(1).getReg() != CMPI2->getOperand(1).getReg())
671 continue;
672
673 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
674 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
675
676 // If immediate are not same, we try to adjust by changing predicate;
677 // e.g. GT imm means GE (imm+1).
678 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
679 int Diff = Imm1 - Imm2;
680 if (Diff < -2 || Diff > 2)
681 continue;
682
683 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
684 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
685 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
686 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
687 if (Diff == 2) {
688 if (PredToInc2 && PredToDec1) {
689 NewPredicate2 = PredToInc2;
690 NewPredicate1 = PredToDec1;
691 NewImm2++;
692 NewImm1--;
693 }
694 }
695 else if (Diff == 1) {
696 if (PredToInc2) {
697 NewImm2++;
698 NewPredicate2 = PredToInc2;
699 }
700 else if (PredToDec1) {
701 NewImm1--;
702 NewPredicate1 = PredToDec1;
703 }
704 }
705 else if (Diff == -1) {
706 if (PredToDec2) {
707 NewImm2--;
708 NewPredicate2 = PredToDec2;
709 }
710 else if (PredToInc1) {
711 NewImm1++;
712 NewPredicate1 = PredToInc1;
713 }
714 }
715 else if (Diff == -2) {
716 if (PredToDec2 && PredToInc1) {
717 NewPredicate2 = PredToDec2;
718 NewPredicate1 = PredToInc1;
719 NewImm2--;
720 NewImm1++;
721 }
722 }
723 }
724
725 // We cannnot merge two compares if the immediates are not same.
726 if (NewImm2 != NewImm1)
727 continue;
728 }
729
730 DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
731 DEBUG(CMPI1->dump());
732 DEBUG(BI1->dump());
733 DEBUG(CMPI2->dump());
734 DEBUG(BI2->dump());
735
736 // We adjust opcode, predicates and immediate as we determined above.
737 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
738 CMPI1->setDesc(TII->get(NewOpCode));
739 }
740 if (NewPredicate1) {
741 BI1->getOperand(0).setImm(NewPredicate1);
742 }
743 if (NewPredicate2) {
744 BI2->getOperand(0).setImm(NewPredicate2);
745 }
746 if (NewImm1 != Imm1) {
747 CMPI1->getOperand(2).setImm(NewImm1);
748 }
749
750 // We finally eliminate compare instruction in MBB2.
751 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
752 BI2->getOperand(1).setIsKill(true);
753 BI1->getOperand(1).setIsKill(false);
754 CMPI2->eraseFromParent();
755
756 DEBUG(dbgs() << "into a compare and two branches:\n");
757 DEBUG(CMPI1->dump());
758 DEBUG(BI1->dump());
759 DEBUG(BI2->dump());
760
761 Simplified = true;
762 }
763
Bill Schmidt34af5e12015-11-10 21:38:26 +0000764 return Simplified;
765}
766
767// This is used to find the "true" source register for an
768// XXPERMDI instruction, since MachineCSE does not handle the
769// "copy-like" operations (Copy and SubregToReg). Returns
770// the original SrcReg unless it is the target of a copy-like
771// operation, in which case we chain backwards through all
772// such operations to the ultimate source register. If a
773// physical register is encountered, we stop the search.
774unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
775
776 while (true) {
777
778 MachineInstr *MI = MRI->getVRegDef(SrcReg);
779 if (!MI->isCopyLike())
780 return SrcReg;
781
782 unsigned CopySrcReg;
783 if (MI->isCopy())
784 CopySrcReg = MI->getOperand(1).getReg();
785 else {
786 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
787 CopySrcReg = MI->getOperand(2).getReg();
788 }
789
790 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
791 return CopySrcReg;
792
793 SrcReg = CopySrcReg;
794 }
795}
796
797} // end default namespace
798
799INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
800 "PowerPC MI Peephole Optimization", false, false)
801INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
802 "PowerPC MI Peephole Optimization", false, false)
803
804char PPCMIPeephole::ID = 0;
805FunctionPass*
806llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
807