blob: 5c50d02771fd3aacfa219fb39ebd863ac60bd9b9 [file] [log] [blame]
Bill Schmidt34af5e12015-11-10 21:38:26 +00001//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9//
10// This pass performs peephole optimizations to clean up ugly code
11// sequences at the MachineInstruction layer. It runs at the end of
12// the SSA phases, following VSX swap removal. A pass of dead code
13// elimination follows this one for quick clean-up of any dead
14// instructions introduced here. Although we could do this as callbacks
15// from the generic peephole pass, this would have a couple of bad
16// effects: it might remove optimization opportunities for VSX swap
17// removal, and it would miss cleanups made possible following VSX
18// swap removal.
19//
20//===---------------------------------------------------------------------===//
21
Bill Schmidt34af5e12015-11-10 21:38:26 +000022#include "PPC.h"
23#include "PPCInstrBuilder.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000024#include "PPCInstrInfo.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000025#include "PPCTargetMachine.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/Support/Debug.h"
Hiroshi Inoue614453b2017-09-05 04:15:17 +000030#include "MCTargetDesc/PPCPredicates.h"
Bill Schmidt34af5e12015-11-10 21:38:26 +000031
32using namespace llvm;
33
34#define DEBUG_TYPE "ppc-mi-peepholes"
35
36namespace llvm {
37 void initializePPCMIPeepholePass(PassRegistry&);
38}
39
40namespace {
41
42struct PPCMIPeephole : public MachineFunctionPass {
43
44 static char ID;
45 const PPCInstrInfo *TII;
46 MachineFunction *MF;
47 MachineRegisterInfo *MRI;
48
49 PPCMIPeephole() : MachineFunctionPass(ID) {
50 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
51 }
52
53private:
54 // Initialize class variables.
55 void initialize(MachineFunction &MFParm);
56
57 // Perform peepholes.
58 bool simplifyCode(void);
59
Hiroshi Inoue614453b2017-09-05 04:15:17 +000060 // Perform peepholes.
61 bool eliminateRedundantCompare(void);
62
Bill Schmidt34af5e12015-11-10 21:38:26 +000063 // Find the "true" register represented by SrcReg (following chains
64 // of copies and subreg_to_reg operations).
65 unsigned lookThruCopyLike(unsigned SrcReg);
66
67public:
68 // Main entry point for this pass.
69 bool runOnMachineFunction(MachineFunction &MF) override {
Andrew Kaylor289bd5f2016-04-27 19:39:32 +000070 if (skipFunction(*MF.getFunction()))
71 return false;
Bill Schmidt34af5e12015-11-10 21:38:26 +000072 initialize(MF);
73 return simplifyCode();
74 }
75};
76
77// Initialize class variables.
78void PPCMIPeephole::initialize(MachineFunction &MFParm) {
79 MF = &MFParm;
80 MRI = &MF->getRegInfo();
81 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
82 DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
83 DEBUG(MF->dump());
84}
85
86// Perform peephole optimizations.
87bool PPCMIPeephole::simplifyCode(void) {
88 bool Simplified = false;
89 MachineInstr* ToErase = nullptr;
90
91 for (MachineBasicBlock &MBB : *MF) {
92 for (MachineInstr &MI : MBB) {
93
94 // If the previous instruction was marked for elimination,
95 // remove it now.
96 if (ToErase) {
97 ToErase->eraseFromParent();
98 ToErase = nullptr;
99 }
100
101 // Ignore debug instructions.
102 if (MI.isDebugValue())
103 continue;
104
105 // Per-opcode peepholes.
106 switch (MI.getOpcode()) {
107
108 default:
109 break;
110
111 case PPC::XXPERMDI: {
112 // Perform simplifications of 2x64 vector swaps and splats.
113 // A swap is identified by an immediate value of 2, and a splat
114 // is identified by an immediate value of 0 or 3.
115 int Immed = MI.getOperand(3).getImm();
116
117 if (Immed != 1) {
118
119 // For each of these simplifications, we need the two source
120 // regs to match. Unfortunately, MachineCSE ignores COPY and
121 // SUBREG_TO_REG, so for example we can see
122 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
123 // We have to look through chains of COPY and SUBREG_TO_REG
124 // to find the real source values for comparison.
125 unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
126 unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
127
128 if (TrueReg1 == TrueReg2
129 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
130 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000131 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
132
133 // If this is a splat fed by a splatting load, the splat is
134 // redundant. Replace with a copy. This doesn't happen directly due
135 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
136 // a load of a double to a vector of 64-bit integers.
137 auto isConversionOfLoadAndSplat = [=]() -> bool {
138 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
139 return false;
140 unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
141 if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
142 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
143 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
144 return true;
145 }
146 return false;
147 };
148 if (DefMI && (Immed == 0 || Immed == 3)) {
149 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
150 DEBUG(dbgs()
151 << "Optimizing load-and-splat/splat "
152 "to load-and-splat/copy: ");
153 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000154 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
155 MI.getOperand(0).getReg())
156 .add(MI.getOperand(1));
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000157 ToErase = &MI;
158 Simplified = true;
159 }
160 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000161
162 // If this is a splat or a swap fed by another splat, we
163 // can replace it with a copy.
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000164 if (DefOpc == PPC::XXPERMDI) {
Bill Schmidt34af5e12015-11-10 21:38:26 +0000165 unsigned FeedImmed = DefMI->getOperand(3).getImm();
166 unsigned FeedReg1
167 = lookThruCopyLike(DefMI->getOperand(1).getReg());
168 unsigned FeedReg2
169 = lookThruCopyLike(DefMI->getOperand(2).getReg());
170
171 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
172 DEBUG(dbgs()
173 << "Optimizing splat/swap or splat/splat "
174 "to splat/copy: ");
175 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000176 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
177 MI.getOperand(0).getReg())
178 .add(MI.getOperand(1));
Bill Schmidt34af5e12015-11-10 21:38:26 +0000179 ToErase = &MI;
180 Simplified = true;
181 }
182
183 // If this is a splat fed by a swap, we can simplify modify
184 // the splat to splat the other value from the swap's input
185 // parameter.
186 else if ((Immed == 0 || Immed == 3)
187 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
188 DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
189 DEBUG(MI.dump());
190 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
191 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
192 MI.getOperand(3).setImm(3 - Immed);
193 Simplified = true;
194 }
195
196 // If this is a swap fed by a swap, we can replace it
197 // with a copy from the first swap's input.
198 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
199 DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
200 DEBUG(MI.dump());
Diana Picus116bbab2017-01-13 09:58:52 +0000201 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
202 MI.getOperand(0).getReg())
203 .add(DefMI->getOperand(1));
Bill Schmidt34af5e12015-11-10 21:38:26 +0000204 ToErase = &MI;
205 Simplified = true;
206 }
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000207 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
208 (DefMI->getOperand(2).getImm() == 0 ||
209 DefMI->getOperand(2).getImm() == 3)) {
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000210 // Splat fed by another splat - switch the output of the first
211 // and remove the second.
212 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
213 ToErase = &MI;
214 Simplified = true;
215 DEBUG(dbgs() << "Removing redundant splat: ");
216 DEBUG(MI.dump());
Bill Schmidt34af5e12015-11-10 21:38:26 +0000217 }
218 }
219 }
220 break;
221 }
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000222 case PPC::VSPLTB:
223 case PPC::VSPLTH:
224 case PPC::XXSPLTW: {
225 unsigned MyOpcode = MI.getOpcode();
226 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
227 unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000228 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
229 break;
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000230 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
231 if (!DefMI)
232 break;
233 unsigned DefOpcode = DefMI->getOpcode();
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000234 auto isConvertOfSplat = [=]() -> bool {
235 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
236 return false;
237 unsigned ConvReg = DefMI->getOperand(1).getReg();
238 if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
239 return false;
240 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
241 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
242 Splt->getOpcode() == PPC::XXSPLTW);
243 };
244 bool AlreadySplat = (MyOpcode == DefOpcode) ||
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000245 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
246 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000247 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
248 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
249 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
250 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
251 // If the instruction[s] that feed this splat have already splat
252 // the value, this splat is redundant.
253 if (AlreadySplat) {
Tim Shen4ff62b12016-10-12 00:48:25 +0000254 DEBUG(dbgs() << "Changing redundant splat to a copy: ");
255 DEBUG(MI.dump());
256 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
257 MI.getOperand(0).getReg())
Diana Picus116bbab2017-01-13 09:58:52 +0000258 .add(MI.getOperand(OpNo));
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000259 ToErase = &MI;
260 Simplified = true;
Nemanja Ivanovic11049f82016-10-04 06:59:23 +0000261 }
262 // Splat fed by a shift. Usually when we align value to splat into
263 // vector element zero.
264 if (DefOpcode == PPC::XXSLDWI) {
265 unsigned ShiftRes = DefMI->getOperand(0).getReg();
266 unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
267 unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
268 unsigned ShiftImm = DefMI->getOperand(3).getImm();
269 unsigned SplatImm = MI.getOperand(2).getImm();
270 if (ShiftOp1 == ShiftOp2) {
271 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
272 if (MRI->hasOneNonDBGUse(ShiftRes)) {
273 DEBUG(dbgs() << "Removing redundant shift: ");
274 DEBUG(DefMI->dump());
275 ToErase = DefMI;
276 }
277 Simplified = true;
278 DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
279 " to " << NewElem << " in instruction: ");
280 DEBUG(MI.dump());
281 MI.getOperand(1).setReg(ShiftOp1);
282 MI.getOperand(2).setImm(NewElem);
283 }
284 }
285 break;
286 }
Nemanja Ivanovic15748f42016-12-06 11:47:14 +0000287 case PPC::XVCVDPSP: {
288 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
289 unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
290 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
291 break;
292 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
293
294 // This can occur when building a vector of single precision or integer
295 // values.
296 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
297 unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
298 unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
299 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
300 !TargetRegisterInfo::isVirtualRegister(DefsReg2))
301 break;
302 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
303 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
304
305 if (!P1 || !P2)
306 break;
307
308 // Remove the passed FRSP instruction if it only feeds this MI and
309 // set any uses of that FRSP (in this MI) to the source of the FRSP.
310 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
311 if (RoundInstr->getOpcode() == PPC::FRSP &&
312 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
313 Simplified = true;
314 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
315 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
316 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
317 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
318 if (Use.getOperand(i).isReg() &&
319 Use.getOperand(i).getReg() == FRSPDefines)
320 Use.getOperand(i).setReg(ConvReg1);
321 DEBUG(dbgs() << "Removing redundant FRSP:\n");
322 DEBUG(RoundInstr->dump());
323 DEBUG(dbgs() << "As it feeds instruction:\n");
324 DEBUG(MI.dump());
325 DEBUG(dbgs() << "Through instruction:\n");
326 DEBUG(DefMI->dump());
327 RoundInstr->eraseFromParent();
328 }
329 };
330
331 // If the input to XVCVDPSP is a vector that was built (even
332 // partially) out of FRSP's, the FRSP(s) can safely be removed
333 // since this instruction performs the same operation.
334 if (P1 != P2) {
335 removeFRSPIfPossible(P1);
336 removeFRSPIfPossible(P2);
337 break;
338 }
339 removeFRSPIfPossible(P1);
340 }
341 break;
342 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000343 }
344 }
Bill Schmidt34af5e12015-11-10 21:38:26 +0000345 // If the last instruction was marked for elimination,
346 // remove it now.
347 if (ToErase) {
348 ToErase->eraseFromParent();
349 ToErase = nullptr;
350 }
351 }
352
Hiroshi Inoue614453b2017-09-05 04:15:17 +0000353 // We try to eliminate redundant compare instruction.
354 Simplified |= eliminateRedundantCompare();
355
356 return Simplified;
357}
358
359// helper functions for eliminateRedundantCompare
360static bool isEqOrNe(MachineInstr *BI) {
361 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
362 unsigned PredCond = PPC::getPredicateCondition(Pred);
363 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
364}
365
366static bool isSupportedCmpOp(unsigned opCode) {
367 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
368 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
369 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
370 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
371}
372
373static bool is64bitCmpOp(unsigned opCode) {
374 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
375 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
376}
377
378static bool isSignedCmpOp(unsigned opCode) {
379 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
380 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
381}
382
383static unsigned getSignedCmpOpCode(unsigned opCode) {
384 if (opCode == PPC::CMPLD) return PPC::CMPD;
385 if (opCode == PPC::CMPLW) return PPC::CMPW;
386 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
387 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
388 return opCode;
389}
390
391// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
392// (LT x) to (LE x-1)
393static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
394 uint64_t Imm = CMPI->getOperand(2).getImm();
395 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
396 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
397 return 0;
398
399 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
400 unsigned PredCond = PPC::getPredicateCondition(Pred);
401 unsigned PredHint = PPC::getPredicateHint(Pred);
402 if (PredCond == PPC::PRED_GE)
403 return PPC::getPredicate(PPC::PRED_GT, PredHint);
404 if (PredCond == PPC::PRED_LT)
405 return PPC::getPredicate(PPC::PRED_LE, PredHint);
406
407 return 0;
408}
409
410// We can increment immediate x in (GT x) by changing it to (GE x+1) or
411// (LE x) to (LT x+1)
412static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
413 uint64_t Imm = CMPI->getOperand(2).getImm();
414 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
415 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
416 return 0;
417
418 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
419 unsigned PredCond = PPC::getPredicateCondition(Pred);
420 unsigned PredHint = PPC::getPredicateHint(Pred);
421 if (PredCond == PPC::PRED_GT)
422 return PPC::getPredicate(PPC::PRED_GE, PredHint);
423 if (PredCond == PPC::PRED_LE)
424 return PPC::getPredicate(PPC::PRED_LT, PredHint);
425
426 return 0;
427}
428
429static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
430 MachineRegisterInfo *MRI) {
431
432 auto isEligibleBB = [&](MachineBasicBlock &BB) {
433 auto BII = BB.getFirstInstrTerminator();
434 // We optimize BBs ending with a conditional branch.
435 // We check only for BCC here, not BCCLR, because BCCLR
436 // will be formed only later in the pipeline.
437 if (BB.succ_size() == 2 &&
438 BII != BB.instr_end() &&
439 (*BII).getOpcode() == PPC::BCC &&
440 (*BII).getOperand(1).isReg()) {
441 // We optimize only if the condition code is used only by one BCC.
442 unsigned CndReg = (*BII).getOperand(1).getReg();
443 if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
444 !MRI->hasOneNonDBGUse(CndReg))
445 return false;
446
447 // We skip this BB if a physical register is used in comparison.
448 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
449 for (MachineOperand &MO : CMPI->operands())
450 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
451 return false;
452
453 return true;
454 }
455 return false;
456 };
457
458 if (MBB.pred_size() != 1)
459 return false;
460
461 MachineBasicBlock *PredMBB = *MBB.pred_begin();
462 if (isEligibleBB(MBB) && isEligibleBB(*PredMBB))
463 return true;
464
465 return false;
466}
467
468// If multiple conditional branches are executed based on the (essentially)
469// same comparison, we merge compare instructions into one and make multiple
470// conditional branches on this comparison.
471// For example,
472// if (a == 0) { ... }
473// else if (a < 0) { ... }
474// can be executed by one compare and two conditional branches instead of
475// two pairs of a compare and a conditional branch.
476//
477// This method merges two compare instructions in two MBBs and modifies the
478// compare and conditional branch instructions if needed.
479// For the above example, the input for this pass looks like:
480// cmplwi r3, 0
481// beq 0, .LBB0_3
482// cmpwi r3, -1
483// bgt 0, .LBB0_4
484// So, before merging two compares, we need to modify these instructions as
485// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
486// beq 0, .LBB0_3
487// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
488// bge 0, .LBB0_4
489
490bool PPCMIPeephole::eliminateRedundantCompare(void) {
491 bool Simplified = false;
492
493 for (MachineBasicBlock &MBB2 : *MF) {
494 // We only consider two basic blocks MBB1 and MBB2 if
495 // - both MBBs end with a conditional branch,
496 // - MBB1 is the only predecessor of MBB2, and
497 // - compare does not take a physical register as a operand in both MBBs.
498 if (!eligibleForCompareElimination(MBB2, MRI))
499 continue;
500
501 MachineBasicBlock *MBB1 = *MBB2.pred_begin();
502 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
503 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
504
505 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
506 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
507
508 // We cannot optimize an unsupported compare opcode or
509 // a mix of 32-bit and 64-bit comaprisons
510 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
511 !isSupportedCmpOp(CMPI2->getOpcode()) ||
512 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
513 continue;
514
515 unsigned NewOpCode = 0;
516 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
517 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
518
519 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
520 // Typically, unsigned comparison is used for equality check, but
521 // we replace it with a signed comparison if the comparison
522 // to be merged is a signed comparison.
523 // In other cases of opcode mismatch, we cannot optimize this.
524 if (isEqOrNe(BI2) &&
525 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
526 NewOpCode = CMPI1->getOpcode();
527 else if (isEqOrNe(BI1) &&
528 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
529 NewOpCode = CMPI2->getOpcode();
530 else continue;
531 }
532
533 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
534 // In case of comparisons between two registers, these two registers
535 // must be same to merge two comparisons.
536 unsigned Cmp1Operand1 = CMPI1->getOperand(1).getReg();
537 unsigned Cmp1Operand2 = CMPI1->getOperand(2).getReg();
538 unsigned Cmp2Operand1 = CMPI2->getOperand(1).getReg();
539 unsigned Cmp2Operand2 = CMPI2->getOperand(2).getReg();
540 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
541 // Same pair of registers in the same order; ready to merge as is.
542 }
543 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
544 // Same pair of registers in different order.
545 // We reverse the predicate to merge compare instructions.
546 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
547 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
548 }
549 else continue;
550 }
551 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()){
552 // In case of comparisons between a register and an immediate,
553 // the operand register must be same for two compare instructions.
554 if (CMPI1->getOperand(1).getReg() != CMPI2->getOperand(1).getReg())
555 continue;
556
557 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
558 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
559
560 // If immediate are not same, we try to adjust by changing predicate;
561 // e.g. GT imm means GE (imm+1).
562 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
563 int Diff = Imm1 - Imm2;
564 if (Diff < -2 || Diff > 2)
565 continue;
566
567 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
568 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
569 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
570 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
571 if (Diff == 2) {
572 if (PredToInc2 && PredToDec1) {
573 NewPredicate2 = PredToInc2;
574 NewPredicate1 = PredToDec1;
575 NewImm2++;
576 NewImm1--;
577 }
578 }
579 else if (Diff == 1) {
580 if (PredToInc2) {
581 NewImm2++;
582 NewPredicate2 = PredToInc2;
583 }
584 else if (PredToDec1) {
585 NewImm1--;
586 NewPredicate1 = PredToDec1;
587 }
588 }
589 else if (Diff == -1) {
590 if (PredToDec2) {
591 NewImm2--;
592 NewPredicate2 = PredToDec2;
593 }
594 else if (PredToInc1) {
595 NewImm1++;
596 NewPredicate1 = PredToInc1;
597 }
598 }
599 else if (Diff == -2) {
600 if (PredToDec2 && PredToInc1) {
601 NewPredicate2 = PredToDec2;
602 NewPredicate1 = PredToInc1;
603 NewImm2--;
604 NewImm1++;
605 }
606 }
607 }
608
609 // We cannnot merge two compares if the immediates are not same.
610 if (NewImm2 != NewImm1)
611 continue;
612 }
613
614 DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
615 DEBUG(CMPI1->dump());
616 DEBUG(BI1->dump());
617 DEBUG(CMPI2->dump());
618 DEBUG(BI2->dump());
619
620 // We adjust opcode, predicates and immediate as we determined above.
621 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
622 CMPI1->setDesc(TII->get(NewOpCode));
623 }
624 if (NewPredicate1) {
625 BI1->getOperand(0).setImm(NewPredicate1);
626 }
627 if (NewPredicate2) {
628 BI2->getOperand(0).setImm(NewPredicate2);
629 }
630 if (NewImm1 != Imm1) {
631 CMPI1->getOperand(2).setImm(NewImm1);
632 }
633
634 // We finally eliminate compare instruction in MBB2.
635 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
636 BI2->getOperand(1).setIsKill(true);
637 BI1->getOperand(1).setIsKill(false);
638 CMPI2->eraseFromParent();
639
640 DEBUG(dbgs() << "into a compare and two branches:\n");
641 DEBUG(CMPI1->dump());
642 DEBUG(BI1->dump());
643 DEBUG(BI2->dump());
644
645 Simplified = true;
646 }
647
Bill Schmidt34af5e12015-11-10 21:38:26 +0000648 return Simplified;
649}
650
651// This is used to find the "true" source register for an
652// XXPERMDI instruction, since MachineCSE does not handle the
653// "copy-like" operations (Copy and SubregToReg). Returns
654// the original SrcReg unless it is the target of a copy-like
655// operation, in which case we chain backwards through all
656// such operations to the ultimate source register. If a
657// physical register is encountered, we stop the search.
658unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
659
660 while (true) {
661
662 MachineInstr *MI = MRI->getVRegDef(SrcReg);
663 if (!MI->isCopyLike())
664 return SrcReg;
665
666 unsigned CopySrcReg;
667 if (MI->isCopy())
668 CopySrcReg = MI->getOperand(1).getReg();
669 else {
670 assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
671 CopySrcReg = MI->getOperand(2).getReg();
672 }
673
674 if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
675 return CopySrcReg;
676
677 SrcReg = CopySrcReg;
678 }
679}
680
681} // end default namespace
682
683INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
684 "PowerPC MI Peephole Optimization", false, false)
685INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
686 "PowerPC MI Peephole Optimization", false, false)
687
688char PPCMIPeephole::ID = 0;
689FunctionPass*
690llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
691