| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 1 | //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // The QPX vector registers overlay the scalar floating-point registers, and | 
|  | 11 | // any scalar floating-point loads splat their value across all vector lanes. | 
|  | 12 | // Thus, if we have a scalar load followed by a splat, we can remove the splat | 
|  | 13 | // (i.e. replace the load with a load-and-splat pseudo instruction). | 
|  | 14 | // | 
|  | 15 | // This pass must run after anything that might do store-to-load forwarding. | 
|  | 16 | // | 
|  | 17 | //===----------------------------------------------------------------------===// | 
|  | 18 |  | 
|  | 19 | #include "PPC.h" | 
|  | 20 | #include "PPCInstrBuilder.h" | 
|  | 21 | #include "PPCInstrInfo.h" | 
|  | 22 | #include "llvm/ADT/SmallVector.h" | 
|  | 23 | #include "llvm/ADT/Statistic.h" | 
|  | 24 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
| David Blaikie | b3bde2e | 2017-11-17 01:07:10 +0000 | [diff] [blame] | 25 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | 
| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 26 | #include "llvm/Support/MathExtras.h" | 
|  | 27 | #include "llvm/Target/TargetMachine.h" | 
| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 28 | using namespace llvm; | 
|  | 29 |  | 
|  | 30 | #define DEBUG_TYPE "ppc-qpx-load-splat" | 
|  | 31 |  | 
|  | 32 | STATISTIC(NumSimplified, "Number of QPX load splats simplified"); | 
|  | 33 |  | 
|  | 34 | namespace llvm { | 
|  | 35 | void initializePPCQPXLoadSplatPass(PassRegistry&); | 
|  | 36 | } | 
|  | 37 |  | 
|  | 38 | namespace { | 
|  | 39 | struct PPCQPXLoadSplat : public MachineFunctionPass { | 
|  | 40 | static char ID; | 
|  | 41 | PPCQPXLoadSplat() : MachineFunctionPass(ID) { | 
|  | 42 | initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry()); | 
|  | 43 | } | 
|  | 44 |  | 
|  | 45 | bool runOnMachineFunction(MachineFunction &Fn) override; | 
|  | 46 |  | 
| Mehdi Amini | 117296c | 2016-10-01 02:56:57 +0000 | [diff] [blame] | 47 | StringRef getPassName() const override { | 
| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 48 | return "PowerPC QPX Load Splat Simplification"; | 
|  | 49 | } | 
|  | 50 | }; | 
|  | 51 | char PPCQPXLoadSplat::ID = 0; | 
|  | 52 | } | 
|  | 53 |  | 
|  | 54 | INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat", | 
|  | 55 | "PowerPC QPX Load Splat Simplification", | 
|  | 56 | false, false) | 
|  | 57 |  | 
|  | 58 | FunctionPass *llvm::createPPCQPXLoadSplatPass() { | 
|  | 59 | return new PPCQPXLoadSplat(); | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { | 
| Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 63 | if (skipFunction(MF.getFunction())) | 
| Andrew Kaylor | 289bd5f | 2016-04-27 19:39:32 +0000 | [diff] [blame] | 64 | return false; | 
|  | 65 |  | 
| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 66 | bool MadeChange = false; | 
|  | 67 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); | 
|  | 68 |  | 
|  | 69 | for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) { | 
|  | 70 | MachineBasicBlock *MBB = &*MFI; | 
|  | 71 | SmallVector<MachineInstr *, 4> Splats; | 
|  | 72 |  | 
|  | 73 | for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) { | 
|  | 74 | MachineInstr *MI = &*MBBI; | 
|  | 75 |  | 
|  | 76 | if (MI->hasUnmodeledSideEffects() || MI->isCall()) { | 
|  | 77 | Splats.clear(); | 
|  | 78 | continue; | 
|  | 79 | } | 
|  | 80 |  | 
|  | 81 | // We're looking for a sequence like this: | 
| Francis Visoiu Mistrih | a8a83d1 | 2017-12-07 10:40:31 +0000 | [diff] [blame] | 82 | // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2) | 
|  | 83 | // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm | 
| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 84 |  | 
|  | 85 | for (auto SI = Splats.begin(); SI != Splats.end();) { | 
|  | 86 | MachineInstr *SMI = *SI; | 
|  | 87 | unsigned SplatReg = SMI->getOperand(0).getReg(); | 
|  | 88 | unsigned SrcReg = SMI->getOperand(1).getReg(); | 
|  | 89 |  | 
|  | 90 | if (MI->modifiesRegister(SrcReg, TRI)) { | 
|  | 91 | switch (MI->getOpcode()) { | 
|  | 92 | default: | 
|  | 93 | SI = Splats.erase(SI); | 
|  | 94 | continue; | 
|  | 95 | case PPC::LFS: | 
|  | 96 | case PPC::LFD: | 
|  | 97 | case PPC::LFSU: | 
|  | 98 | case PPC::LFDU: | 
|  | 99 | case PPC::LFSUX: | 
|  | 100 | case PPC::LFDUX: | 
|  | 101 | case PPC::LFSX: | 
|  | 102 | case PPC::LFDX: | 
|  | 103 | case PPC::LFIWAX: | 
|  | 104 | case PPC::LFIWZX: | 
|  | 105 | if (SplatReg != SrcReg) { | 
|  | 106 | // We need to change the load to define the scalar subregister of | 
|  | 107 | // the QPX splat source register. | 
|  | 108 | unsigned SubRegIndex = | 
|  | 109 | TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); | 
|  | 110 | unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); | 
|  | 111 |  | 
|  | 112 | // Substitute both the explicit defined register, and also the | 
|  | 113 | // implicit def of the containing QPX register. | 
|  | 114 | MI->getOperand(0).setReg(SplatSubReg); | 
|  | 115 | MI->substituteRegister(SrcReg, SplatReg, 0, *TRI); | 
|  | 116 | } | 
|  | 117 |  | 
|  | 118 | SI = Splats.erase(SI); | 
|  | 119 |  | 
|  | 120 | // If SMI is directly after MI, then MBBI's base iterator is | 
|  | 121 | // pointing at SMI.  Adjust MBBI around the call to erase SMI to | 
|  | 122 | // avoid invalidating MBBI. | 
|  | 123 | ++MBBI; | 
|  | 124 | SMI->eraseFromParent(); | 
|  | 125 | --MBBI; | 
|  | 126 |  | 
|  | 127 | ++NumSimplified; | 
|  | 128 | MadeChange = true; | 
|  | 129 | continue; | 
|  | 130 | } | 
|  | 131 | } | 
|  | 132 |  | 
| NAKAMURA Takumi | fe1202c | 2016-06-20 00:37:41 +0000 | [diff] [blame] | 133 | // If this instruction defines the splat register, then we cannot move | 
|  | 134 | // the previous definition above it. If it reads from the splat | 
|  | 135 | // register, then it must already be alive from some previous | 
|  | 136 | // definition, and if the splat register is different from the source | 
|  | 137 | // register, then this definition must not be the load for which we're | 
|  | 138 | // searching. | 
| Hal Finkel | 17e9754 | 2016-04-30 01:59:28 +0000 | [diff] [blame] | 139 | if (MI->modifiesRegister(SplatReg, TRI) || | 
|  | 140 | (SrcReg != SplatReg && | 
|  | 141 | MI->readsRegister(SplatReg, TRI))) { | 
| Hal Finkel | fc35391 | 2016-03-31 20:39:41 +0000 | [diff] [blame] | 142 | SI = Splats.erase(SI); | 
|  | 143 | continue; | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | ++SI; | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | if (MI->getOpcode() != PPC::QVESPLATI && | 
|  | 150 | MI->getOpcode() != PPC::QVESPLATIs && | 
|  | 151 | MI->getOpcode() != PPC::QVESPLATIb) | 
|  | 152 | continue; | 
|  | 153 | if (MI->getOperand(2).getImm() != 0) | 
|  | 154 | continue; | 
|  | 155 |  | 
|  | 156 | // If there are other uses of the scalar value after this, replacing | 
|  | 157 | // those uses might be non-trivial. | 
|  | 158 | if (!MI->getOperand(1).isKill()) | 
|  | 159 | continue; | 
|  | 160 |  | 
|  | 161 | Splats.push_back(MI); | 
|  | 162 | } | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | return MadeChange; | 
|  | 166 | } |