blob: fc6a0439c559e86d1aa483f93d6a015d40e73676 [file] [log] [blame]
Tim Northover00ed9962014-03-29 10:18:08 +00001//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass identifies floating point stores that should not be combined into
11// store pairs. Later we may do the same for floating point loads.
12// ===---------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "arm64-stp-suppress"
15#include "ARM64InstrInfo.h"
16#include "llvm/CodeGen/MachineFunction.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18#include "llvm/CodeGen/MachineInstr.h"
19#include "llvm/CodeGen/MachineTraceMetrics.h"
20#include "llvm/Target/TargetInstrInfo.h"
21#include "llvm/CodeGen/TargetSchedule.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace llvm;
26
27namespace {
28class ARM64StorePairSuppress : public MachineFunctionPass {
29 const ARM64InstrInfo *TII;
30 const TargetRegisterInfo *TRI;
31 const MachineRegisterInfo *MRI;
32 MachineFunction *MF;
33 TargetSchedModel SchedModel;
34 MachineTraceMetrics *Traces;
35 MachineTraceMetrics::Ensemble *MinInstr;
36
37public:
38 static char ID;
39 ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
40
Jim Grosbach20b07902014-04-02 18:00:59 +000041 virtual const char *getPassName() const override {
Tim Northover00ed9962014-03-29 10:18:08 +000042 return "ARM64 Store Pair Suppression";
43 }
44
Jim Grosbach20b07902014-04-02 18:00:59 +000045 bool runOnMachineFunction(MachineFunction &F) override;
Tim Northover00ed9962014-03-29 10:18:08 +000046
47private:
48 bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
49
Jim Grosbach20b07902014-04-02 18:00:59 +000050 bool isNarrowFPStore(const MachineInstr &MI);
Tim Northover00ed9962014-03-29 10:18:08 +000051
Jim Grosbach20b07902014-04-02 18:00:59 +000052 virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
Tim Northover00ed9962014-03-29 10:18:08 +000053 AU.setPreservesCFG();
54 AU.addRequired<MachineTraceMetrics>();
55 AU.addPreserved<MachineTraceMetrics>();
56 MachineFunctionPass::getAnalysisUsage(AU);
57 }
58};
59char ARM64StorePairSuppress::ID = 0;
60} // anonymous
61
62FunctionPass *llvm::createARM64StorePairSuppressPass() {
63 return new ARM64StorePairSuppress();
64}
65
66/// Return true if an STP can be added to this block without increasing the
67/// critical resource height. STP is good to form in Ld/St limited blocks and
68/// bad to form in float-point limited blocks. This is true independent of the
69/// critical path. If the critical path is longer than the resource height, the
70/// extra vector ops can limit physreg renaming. Otherwise, it could simply
71/// oversaturate the vector units.
72bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
73 if (!MinInstr)
74 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
75
76 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
77 unsigned ResLength = BBTrace.getResourceLength();
78
79 // Get the machine model's scheduling class for STPQi.
80 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
81 unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
82 const MCSchedClassDesc *SCDesc =
83 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
84
85 // If a subtarget does not define resources for STPQi, bail here.
86 if (SCDesc->isValid() && !SCDesc->isVariant()) {
87 unsigned ResLenWithSTP = BBTrace.getResourceLength(
88 ArrayRef<const MachineBasicBlock *>(), SCDesc);
89 if (ResLenWithSTP > ResLength) {
90 DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
91 << " resources " << ResLength << " -> " << ResLenWithSTP
92 << "\n");
93 return false;
94 }
95 }
96 return true;
97}
98
99/// Return true if this is a floating-point store smaller than the V reg. On
100/// cyclone, these require a vector shuffle before storing a pair.
101/// Ideally we would call getMatchingPairOpcode() and have the machine model
102/// tell us if it's profitable with no cpu knowledge here.
103///
104/// FIXME: We plan to develop a decent Target abstraction for simple loads and
105/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
Jim Grosbach20b07902014-04-02 18:00:59 +0000106bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
107 switch (MI.getOpcode()) {
Tim Northover00ed9962014-03-29 10:18:08 +0000108 default:
109 return false;
110 case ARM64::STRSui:
111 case ARM64::STRDui:
112 case ARM64::STURSi:
113 case ARM64::STURDi:
114 return true;
115 }
116}
117
118bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
119 MF = &mf;
120 TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
121 TRI = MF->getTarget().getRegisterInfo();
122 MRI = &MF->getRegInfo();
123 const TargetSubtargetInfo &ST =
124 MF->getTarget().getSubtarget<TargetSubtargetInfo>();
125 SchedModel.init(*ST.getSchedModel(), &ST, TII);
126
127 Traces = &getAnalysis<MachineTraceMetrics>();
128 MinInstr = 0;
129
130 DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
131
132 if (!SchedModel.hasInstrSchedModel()) {
133 DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
134 return false;
135 }
136
137 // Check for a sequence of stores to the same base address. We don't need to
138 // precisely determine whether a store pair can be formed. But we do want to
139 // filter out most situations where we can't form store pairs to avoid
140 // computing trace metrics in those cases.
Jim Grosbachb8bd4a52014-04-03 23:43:26 +0000141 for (auto &MBB : *MF) {
Tim Northover00ed9962014-03-29 10:18:08 +0000142 bool SuppressSTP = false;
143 unsigned PrevBaseReg = 0;
Jim Grosbachb8bd4a52014-04-03 23:43:26 +0000144 for (auto &MI : MBB) {
Jim Grosbach20b07902014-04-02 18:00:59 +0000145 if (!isNarrowFPStore(MI))
Tim Northover00ed9962014-03-29 10:18:08 +0000146 continue;
147 unsigned BaseReg;
148 unsigned Offset;
Jim Grosbach20b07902014-04-02 18:00:59 +0000149 if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) {
Tim Northover00ed9962014-03-29 10:18:08 +0000150 if (PrevBaseReg == BaseReg) {
151 // If this block can take STPs, skip ahead to the next block.
Jim Grosbach20b07902014-04-02 18:00:59 +0000152 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
Tim Northover00ed9962014-03-29 10:18:08 +0000153 break;
154 // Otherwise, continue unpairing the stores in this block.
Jim Grosbach20b07902014-04-02 18:00:59 +0000155 DEBUG(dbgs() << "Unpairing store " << MI << "\n");
Tim Northover00ed9962014-03-29 10:18:08 +0000156 SuppressSTP = true;
Jim Grosbach20b07902014-04-02 18:00:59 +0000157 TII->suppressLdStPair(&MI);
Tim Northover00ed9962014-03-29 10:18:08 +0000158 }
159 PrevBaseReg = BaseReg;
160 } else
161 PrevBaseReg = 0;
162 }
163 }
164 // This pass just sets some internal MachineMemOperand flags. It can't really
165 // invalidate anything.
166 return false;
167}