blob: 7f1079e3182c9b8f279af2e62d499b137b972334 [file] [log] [blame]
Clement Courbetac74acd2018-04-04 11:37:06 +00001//===-- Uops.cpp ------------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "Uops.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000011
12#include "Assembler.h"
13#include "BenchmarkRunner.h"
14#include "MCInstrDescView.h"
Clement Courbetac74acd2018-04-04 11:37:06 +000015#include "PerfHelper.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000016
17// FIXME: Load constants into registers (e.g. with fld1) to not break
18// instructions like x87.
19
20// Ideally we would like the only limitation on executing uops to be the issue
21// ports. Maximizing port pressure increases the likelihood that the load is
22// distributed evenly across possible ports.
23
24// To achieve that, one approach is to generate instructions that do not have
25// data dependencies between them.
26//
27// For some instructions, this is trivial:
28// mov rax, qword ptr [rsi]
29// mov rax, qword ptr [rsi]
30// mov rax, qword ptr [rsi]
31// mov rax, qword ptr [rsi]
32// For the above snippet, haswell just renames rax four times and executes the
33// four instructions two at a time on P23 and P0126.
34//
35// For some instructions, we just need to make sure that the source is
36// different from the destination. For example, IDIV8r reads from GPR and
37// writes to AX. We just need to ensure that the Var is assigned a
38// register which is different from AX:
39// idiv bx
40// idiv bx
41// idiv bx
42// idiv bx
43// The above snippet will be able to fully saturate the ports, while the same
44// with ax would issue one uop every `latency(IDIV8r)` cycles.
45//
46// Some instructions make this harder because they both read and write from
47// the same register:
48// inc rax
49// inc rax
50// inc rax
51// inc rax
52// This has a data dependency from each instruction to the next, limit the
53// number of instructions that can be issued in parallel.
54// It turns out that this is not a big issue on recent Intel CPUs because they
55// have heuristics to balance port pressure. In the snippet above, subsequent
56// instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs
57// might end up executing them all on P0 (just because they can), or try
58// avoiding P5 because it's usually under high pressure from vector
59// instructions.
60// This issue is even more important for high-latency instructions because
61// they increase the idle time of the CPU, e.g. :
62// imul rax, rbx
63// imul rax, rbx
64// imul rax, rbx
65// imul rax, rbx
66//
67// To avoid that, we do the renaming statically by generating as many
68// independent exclusive assignments as possible (until all possible registers
69// are exhausted) e.g.:
70// imul rax, rbx
71// imul rcx, rbx
72// imul rdx, rbx
73// imul r8, rbx
74//
75// Some instruction even make the above static renaming impossible because
76// they implicitly read and write from the same operand, e.g. ADC16rr reads
77// and writes from EFLAGS.
78// In that case we just use a greedy register assignment and hope for the
79// best.
Clement Courbetac74acd2018-04-04 11:37:06 +000080
81namespace exegesis {
82
Clement Courbet0e69e2d2018-05-17 10:52:18 +000083static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
84 return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
85}
86
87// FIXME: Handle memory, see PR36905.
88static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
89 return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
90}
91
92static bool isInfeasible(const Instruction &Instruction, std::string &Error) {
93 const auto &MCInstrDesc = Instruction.Description;
94 if (MCInstrDesc.isPseudo()) {
95 Error = "is pseudo";
Clement Courbetac74acd2018-04-04 11:37:06 +000096 return true;
Clement Courbetac74acd2018-04-04 11:37:06 +000097 }
Clement Courbet0e69e2d2018-05-17 10:52:18 +000098 if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand)) {
99 Error = "has unknown operands";
100 return true;
101 }
102 if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand)) {
103 Error = "has memory operands";
104 return true;
105 }
106 return false;
107}
108
109// Returns whether this Variable ties Use and Def operands together.
110static bool hasTiedOperands(const Variable *Var) {
111 bool HasUse = false;
112 bool HasDef = false;
113 for (const Operand *Op : Var->TiedOperands) {
114 if (Op->IsDef)
115 HasDef = true;
116 else
117 HasUse = true;
118 }
119 return HasUse && HasDef;
120}
121
122static llvm::SmallVector<Variable *, 8>
123getTiedVariables(const Instruction &Instruction) {
124 llvm::SmallVector<Variable *, 8> Result;
125 for (auto *Var : Instruction.Variables)
126 if (hasTiedOperands(Var))
127 Result.push_back(Var);
128 return Result;
129}
130
131static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
132 assert(a.size() == b.size());
133 for (auto I : b.set_bits())
134 a.reset(I);
Clement Courbetac74acd2018-04-04 11:37:06 +0000135}
136
137static llvm::Error makeError(llvm::Twine Msg) {
138 return llvm::make_error<llvm::StringError>(Msg,
139 llvm::inconvertibleErrorCode());
140}
141
Clement Courbetac74acd2018-04-04 11:37:06 +0000142UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
143
Clement Courbet62b34fa2018-06-06 09:42:36 +0000144InstructionBenchmark::ModeE UopsBenchmarkRunner::getMode() const {
145 return InstructionBenchmark::Uops;
Clement Courbet2cb97b92018-06-04 11:43:40 +0000146}
Clement Courbetac74acd2018-04-04 11:37:06 +0000147
Guillaume Chatelet7b852cd2018-06-07 08:11:54 +0000148llvm::Expected<BenchmarkConfiguration>
149UopsBenchmarkRunner::createConfiguration(RegisterAliasingTrackerCache &RATC,
150 unsigned Opcode,
151 llvm::raw_ostream &Info) const {
152 BenchmarkConfiguration Configuration;
153 std::vector<llvm::MCInst> &Snippet = Configuration.Snippet;
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000154 const llvm::MCInstrDesc &MCInstrDesc = MCInstrInfo.get(Opcode);
155 const Instruction Instruction(MCInstrDesc, RATC);
156
157 std::string Error;
158 if (isInfeasible(Instruction, Error)) {
159 llvm::report_fatal_error(llvm::Twine("Infeasible : ").concat(Error));
Clement Courbetac74acd2018-04-04 11:37:06 +0000160 }
161
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000162 const AliasingConfigurations SelfAliasing(Instruction, Instruction);
163 if (SelfAliasing.empty()) {
164 Info << "instruction is parallel, repeating a random one.\n";
165 Snippet.push_back(randomizeUnsetVariablesAndBuild(Instruction));
Guillaume Chatelet7b852cd2018-06-07 08:11:54 +0000166 return Configuration;
Clement Courbetac74acd2018-04-04 11:37:06 +0000167 }
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000168 if (SelfAliasing.hasImplicitAliasing()) {
169 Info << "instruction is serial, repeating a random one.\n";
170 Snippet.push_back(randomizeUnsetVariablesAndBuild(Instruction));
Guillaume Chatelet7b852cd2018-06-07 08:11:54 +0000171 return Configuration;
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000172 }
173 const auto TiedVariables = getTiedVariables(Instruction);
174 if (!TiedVariables.empty()) {
175 if (TiedVariables.size() > 1) {
176 Info << "Not yet implemented, don't know how to handle several tied "
177 "variables\n";
178 return makeError("Infeasible : don't know how to handle several tied "
179 "variables");
180 }
181 Info << "instruction has tied variables using static renaming.\n";
182 Variable *Var = TiedVariables.front();
183 assert(Var);
184 assert(!Var->TiedOperands.empty());
185 const Operand &Operand = *Var->TiedOperands.front();
186 assert(Operand.Tracker);
187 for (const llvm::MCPhysReg Reg : Operand.Tracker->sourceBits().set_bits()) {
188 clearVariableAssignments(Instruction);
189 Var->AssignedValue = llvm::MCOperand::createReg(Reg);
190 Snippet.push_back(randomizeUnsetVariablesAndBuild(Instruction));
191 }
Guillaume Chatelet7b852cd2018-06-07 08:11:54 +0000192 return Configuration;
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000193 }
194 // No tied variables, we pick random values for defs.
195 llvm::BitVector Defs(MCRegisterInfo.getNumRegs());
196 for (const auto &Op : Instruction.Operands) {
197 if (Op.Tracker && Op.IsExplicit && Op.IsDef) {
198 assert(Op.Var);
199 auto PossibleRegisters = Op.Tracker->sourceBits();
200 remove(PossibleRegisters, RATC.reservedRegisters());
201 assert(PossibleRegisters.any() && "No register left to choose from");
202 const auto RandomReg = randomBit(PossibleRegisters);
203 Defs.set(RandomReg);
204 Op.Var->AssignedValue = llvm::MCOperand::createReg(RandomReg);
205 }
206 }
207 // And pick random use values that are not reserved and don't alias with defs.
208 const auto DefAliases = getAliasedBits(MCRegisterInfo, Defs);
209 for (const auto &Op : Instruction.Operands) {
210 if (Op.Tracker && Op.IsExplicit && !Op.IsDef) {
211 assert(Op.Var);
212 auto PossibleRegisters = Op.Tracker->sourceBits();
213 remove(PossibleRegisters, RATC.reservedRegisters());
214 remove(PossibleRegisters, DefAliases);
215 assert(PossibleRegisters.any() && "No register left to choose from");
216 const auto RandomReg = randomBit(PossibleRegisters);
217 Op.Var->AssignedValue = llvm::MCOperand::createReg(RandomReg);
218 }
219 }
220 Info
221 << "instruction has no tied variables picking Uses different from defs\n";
222 Snippet.push_back(randomizeUnsetVariablesAndBuild(Instruction));
Guillaume Chatelet7b852cd2018-06-07 08:11:54 +0000223 return Configuration;
Clement Courbetac74acd2018-04-04 11:37:06 +0000224}
225
226std::vector<BenchmarkMeasure>
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000227UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
Clement Courbetac74acd2018-04-04 11:37:06 +0000228 const unsigned NumRepetitions) const {
229 const auto &SchedModel = State.getSubtargetInfo().getSchedModel();
230
231 std::vector<BenchmarkMeasure> Result;
232 for (unsigned ProcResIdx = 1;
233 ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) {
Clement Courbetb4493792018-04-10 08:16:37 +0000234 const char *const PfmCounters = SchedModel.getExtraProcessorInfo()
235 .PfmCounters.IssueCounters[ProcResIdx];
236 if (!PfmCounters)
Clement Courbetac74acd2018-04-04 11:37:06 +0000237 continue;
Clement Courbetb4493792018-04-10 08:16:37 +0000238 // FIXME: Sum results when there are several counters for a single ProcRes
239 // (e.g. P23 on SandyBridge).
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000240 pfm::PerfEvent UopPerfEvent(PfmCounters);
241 if (!UopPerfEvent.valid())
242 llvm::report_fatal_error(
243 llvm::Twine("invalid perf event ").concat(PfmCounters));
244 pfm::Counter Counter(UopPerfEvent);
Clement Courbetac74acd2018-04-04 11:37:06 +0000245 Counter.start();
246 Function();
247 Counter.stop();
248 Result.push_back({llvm::itostr(ProcResIdx),
249 static_cast<double>(Counter.read()) / NumRepetitions,
Clement Courbetb4493792018-04-10 08:16:37 +0000250 SchedModel.getProcResource(ProcResIdx)->Name});
Clement Courbetac74acd2018-04-04 11:37:06 +0000251 }
252 return Result;
253}
254
255} // namespace exegesis