blob: 349f5f4eaa4bf61fa70b980a62044b515d72a2e2 [file] [log] [blame]
Clement Courbetac74acd2018-04-04 11:37:06 +00001//===-- Uops.cpp ------------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "Uops.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000011
12#include "Assembler.h"
13#include "BenchmarkRunner.h"
14#include "MCInstrDescView.h"
Clement Courbetac74acd2018-04-04 11:37:06 +000015#include "PerfHelper.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000016
17// FIXME: Load constants into registers (e.g. with fld1) to not break
18// instructions like x87.
19
20// Ideally we would like the only limitation on executing uops to be the issue
21// ports. Maximizing port pressure increases the likelihood that the load is
22// distributed evenly across possible ports.
23
24// To achieve that, one approach is to generate instructions that do not have
25// data dependencies between them.
26//
27// For some instructions, this is trivial:
28// mov rax, qword ptr [rsi]
29// mov rax, qword ptr [rsi]
30// mov rax, qword ptr [rsi]
31// mov rax, qword ptr [rsi]
32// For the above snippet, haswell just renames rax four times and executes the
33// four instructions two at a time on P23 and P0126.
34//
35// For some instructions, we just need to make sure that the source is
36// different from the destination. For example, IDIV8r reads from GPR and
37// writes to AX. We just need to ensure that the Var is assigned a
38// register which is different from AX:
39// idiv bx
40// idiv bx
41// idiv bx
42// idiv bx
43// The above snippet will be able to fully saturate the ports, while the same
44// with ax would issue one uop every `latency(IDIV8r)` cycles.
45//
46// Some instructions make this harder because they both read and write from
47// the same register:
48// inc rax
49// inc rax
50// inc rax
51// inc rax
52// This has a data dependency from each instruction to the next, limit the
53// number of instructions that can be issued in parallel.
54// It turns out that this is not a big issue on recent Intel CPUs because they
55// have heuristics to balance port pressure. In the snippet above, subsequent
56// instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs
57// might end up executing them all on P0 (just because they can), or try
58// avoiding P5 because it's usually under high pressure from vector
59// instructions.
60// This issue is even more important for high-latency instructions because
61// they increase the idle time of the CPU, e.g. :
62// imul rax, rbx
63// imul rax, rbx
64// imul rax, rbx
65// imul rax, rbx
66//
67// To avoid that, we do the renaming statically by generating as many
68// independent exclusive assignments as possible (until all possible registers
69// are exhausted) e.g.:
70// imul rax, rbx
71// imul rcx, rbx
72// imul rdx, rbx
73// imul r8, rbx
74//
75// Some instruction even make the above static renaming impossible because
76// they implicitly read and write from the same operand, e.g. ADC16rr reads
77// and writes from EFLAGS.
78// In that case we just use a greedy register assignment and hope for the
79// best.
Clement Courbetac74acd2018-04-04 11:37:06 +000080
81namespace exegesis {
82
Clement Courbet0e69e2d2018-05-17 10:52:18 +000083static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
84 return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
85}
86
87// FIXME: Handle memory, see PR36905.
88static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
89 return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
90}
91
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000092llvm::Error
93UopsBenchmarkRunner::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const {
94 if (MCInstrDesc.isPseudo())
95 return llvm::make_error<BenchmarkFailure>("Infeasible : is pseudo");
96 if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
97 return llvm::make_error<BenchmarkFailure>(
98 "Infeasible : has unknown operands");
99 if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand))
Clement Courbetcff2caa2018-06-25 11:22:23 +0000100 return llvm::make_error<BenchmarkFailure>(
101 "Infeasible : has memory operands");
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000102 return llvm::Error::success();
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000103}
104
105// Returns whether this Variable ties Use and Def operands together.
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000106static bool hasTiedOperands(const Instruction &Instr, const Variable &Var) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000107 bool HasUse = false;
108 bool HasDef = false;
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000109 for (const unsigned OpIndex : Var.TiedOperands) {
110 const Operand &Op = Instr.Operands[OpIndex];
111 if (Op.IsDef)
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000112 HasDef = true;
113 else
114 HasUse = true;
115 }
116 return HasUse && HasDef;
117}
118
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000119static llvm::SmallVector<const Variable *, 8>
120getTiedVariables(const Instruction &Instr) {
121 llvm::SmallVector<const Variable *, 8> Result;
122 for (const auto &Var : Instr.Variables)
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000123 if (hasTiedOperands(Instr, Var))
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000124 Result.push_back(&Var);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000125 return Result;
126}
127
128static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
129 assert(a.size() == b.size());
130 for (auto I : b.set_bits())
131 a.reset(I);
Clement Courbetac74acd2018-04-04 11:37:06 +0000132}
133
Clement Courbetac74acd2018-04-04 11:37:06 +0000134UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
135
Clement Courbet62b34fa2018-06-06 09:42:36 +0000136InstructionBenchmark::ModeE UopsBenchmarkRunner::getMode() const {
137 return InstructionBenchmark::Uops;
Clement Courbet2cb97b92018-06-04 11:43:40 +0000138}
Clement Courbetac74acd2018-04-04 11:37:06 +0000139
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000140llvm::Expected<SnippetPrototype>
141UopsBenchmarkRunner::generatePrototype(unsigned Opcode) const {
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000142 const auto &InstrDesc = MCInstrInfo.get(Opcode);
143 if (auto E = isInfeasible(InstrDesc))
144 return std::move(E);
145 const Instruction Instr(InstrDesc, RATC);
146 const AliasingConfigurations SelfAliasing(Instr, Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000147 if (SelfAliasing.empty()) {
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000148 SnippetPrototype Prototype;
149 Prototype.Explanation = "instruction is parallel, repeating a random one.";
150 Prototype.Snippet.emplace_back(Instr);
Clement Courbet2c409702018-06-20 09:18:32 +0000151 return std::move(Prototype);
Clement Courbetac74acd2018-04-04 11:37:06 +0000152 }
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000153 if (SelfAliasing.hasImplicitAliasing()) {
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000154 SnippetPrototype Prototype;
155 Prototype.Explanation = "instruction is serial, repeating a random one.";
156 Prototype.Snippet.emplace_back(Instr);
Clement Courbet2c409702018-06-20 09:18:32 +0000157 return std::move(Prototype);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000158 }
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000159 const auto TiedVariables = getTiedVariables(Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000160 if (!TiedVariables.empty()) {
Guillaume Chateletb4f15822018-06-07 14:00:29 +0000161 if (TiedVariables.size() > 1)
162 return llvm::make_error<llvm::StringError>(
163 "Infeasible : don't know how to handle several tied variables",
164 llvm::inconvertibleErrorCode());
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000165 const Variable *Var = TiedVariables.front();
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000166 assert(Var);
167 assert(!Var->TiedOperands.empty());
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000168 const Operand &Op = Instr.Operands[Var->TiedOperands.front()];
169 assert(Op.Tracker);
170 SnippetPrototype Prototype;
171 Prototype.Explanation =
172 "instruction has tied variables using static renaming.";
173 for (const llvm::MCPhysReg Reg : Op.Tracker->sourceBits().set_bits()) {
174 Prototype.Snippet.emplace_back(Instr);
175 Prototype.Snippet.back().getValueFor(*Var) =
176 llvm::MCOperand::createReg(Reg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000177 }
Clement Courbet2c409702018-06-20 09:18:32 +0000178 return std::move(Prototype);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000179 }
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000180 InstructionInstance II(Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000181 // No tied variables, we pick random values for defs.
182 llvm::BitVector Defs(MCRegisterInfo.getNumRegs());
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000183 for (const auto &Op : Instr.Operands) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000184 if (Op.Tracker && Op.IsExplicit && Op.IsDef) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000185 auto PossibleRegisters = Op.Tracker->sourceBits();
186 remove(PossibleRegisters, RATC.reservedRegisters());
187 assert(PossibleRegisters.any() && "No register left to choose from");
188 const auto RandomReg = randomBit(PossibleRegisters);
189 Defs.set(RandomReg);
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000190 II.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000191 }
192 }
193 // And pick random use values that are not reserved and don't alias with defs.
194 const auto DefAliases = getAliasedBits(MCRegisterInfo, Defs);
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000195 for (const auto &Op : Instr.Operands) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000196 if (Op.Tracker && Op.IsExplicit && !Op.IsDef) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000197 auto PossibleRegisters = Op.Tracker->sourceBits();
198 remove(PossibleRegisters, RATC.reservedRegisters());
199 remove(PossibleRegisters, DefAliases);
200 assert(PossibleRegisters.any() && "No register left to choose from");
201 const auto RandomReg = randomBit(PossibleRegisters);
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000202 II.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000203 }
204 }
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000205 SnippetPrototype Prototype;
206 Prototype.Explanation =
Guillaume Chateletb4f15822018-06-07 14:00:29 +0000207 "instruction has no tied variables picking Uses different from defs";
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000208 Prototype.Snippet.push_back(std::move(II));
Clement Courbet2c409702018-06-20 09:18:32 +0000209 return std::move(Prototype);
Clement Courbetac74acd2018-04-04 11:37:06 +0000210}
211
212std::vector<BenchmarkMeasure>
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000213UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
Clement Courbetac74acd2018-04-04 11:37:06 +0000214 const unsigned NumRepetitions) const {
215 const auto &SchedModel = State.getSubtargetInfo().getSchedModel();
216
217 std::vector<BenchmarkMeasure> Result;
218 for (unsigned ProcResIdx = 1;
219 ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) {
Clement Courbetb4493792018-04-10 08:16:37 +0000220 const char *const PfmCounters = SchedModel.getExtraProcessorInfo()
221 .PfmCounters.IssueCounters[ProcResIdx];
222 if (!PfmCounters)
Clement Courbetac74acd2018-04-04 11:37:06 +0000223 continue;
Clement Courbet38275372018-06-12 13:28:37 +0000224 // We sum counts when there are several counters for a single ProcRes
Clement Courbetb4493792018-04-10 08:16:37 +0000225 // (e.g. P23 on SandyBridge).
Clement Courbet38275372018-06-12 13:28:37 +0000226 int64_t CounterValue = 0;
227 llvm::SmallVector<llvm::StringRef, 2> CounterNames;
228 llvm::StringRef(PfmCounters).split(CounterNames, ',');
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000229 for (const auto &CounterName : CounterNames) {
Clement Courbet38275372018-06-12 13:28:37 +0000230 pfm::PerfEvent UopPerfEvent(CounterName);
231 if (!UopPerfEvent.valid())
232 llvm::report_fatal_error(
233 llvm::Twine("invalid perf event ").concat(PfmCounters));
234 pfm::Counter Counter(UopPerfEvent);
235 Counter.start();
236 Function();
237 Counter.stop();
238 CounterValue += Counter.read();
239 }
Clement Courbetac74acd2018-04-04 11:37:06 +0000240 Result.push_back({llvm::itostr(ProcResIdx),
Clement Courbet38275372018-06-12 13:28:37 +0000241 static_cast<double>(CounterValue) / NumRepetitions,
Clement Courbetb4493792018-04-10 08:16:37 +0000242 SchedModel.getProcResource(ProcResIdx)->Name});
Clement Courbetac74acd2018-04-04 11:37:06 +0000243 }
244 return Result;
245}
246
247} // namespace exegesis