blob: dbecbfe54413882cb0c0324427cfb8823af5f891 [file] [log] [blame]
Clement Courbetac74acd2018-04-04 11:37:06 +00001//===-- Uops.cpp ------------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "Uops.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000011
12#include "Assembler.h"
13#include "BenchmarkRunner.h"
14#include "MCInstrDescView.h"
Clement Courbetac74acd2018-04-04 11:37:06 +000015#include "PerfHelper.h"
Guillaume Chateletfb943542018-08-01 14:41:45 +000016#include "Target.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000017
18// FIXME: Load constants into registers (e.g. with fld1) to not break
19// instructions like x87.
20
21// Ideally we would like the only limitation on executing uops to be the issue
22// ports. Maximizing port pressure increases the likelihood that the load is
23// distributed evenly across possible ports.
24
25// To achieve that, one approach is to generate instructions that do not have
26// data dependencies between them.
27//
28// For some instructions, this is trivial:
29// mov rax, qword ptr [rsi]
30// mov rax, qword ptr [rsi]
31// mov rax, qword ptr [rsi]
32// mov rax, qword ptr [rsi]
33// For the above snippet, haswell just renames rax four times and executes the
34// four instructions two at a time on P23 and P0126.
35//
36// For some instructions, we just need to make sure that the source is
37// different from the destination. For example, IDIV8r reads from GPR and
38// writes to AX. We just need to ensure that the Var is assigned a
39// register which is different from AX:
40// idiv bx
41// idiv bx
42// idiv bx
43// idiv bx
44// The above snippet will be able to fully saturate the ports, while the same
45// with ax would issue one uop every `latency(IDIV8r)` cycles.
46//
47// Some instructions make this harder because they both read and write from
48// the same register:
49// inc rax
50// inc rax
51// inc rax
52// inc rax
53// This has a data dependency from each instruction to the next, limit the
54// number of instructions that can be issued in parallel.
55// It turns out that this is not a big issue on recent Intel CPUs because they
56// have heuristics to balance port pressure. In the snippet above, subsequent
57// instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs
58// might end up executing them all on P0 (just because they can), or try
59// avoiding P5 because it's usually under high pressure from vector
60// instructions.
61// This issue is even more important for high-latency instructions because
62// they increase the idle time of the CPU, e.g. :
63// imul rax, rbx
64// imul rax, rbx
65// imul rax, rbx
66// imul rax, rbx
67//
68// To avoid that, we do the renaming statically by generating as many
69// independent exclusive assignments as possible (until all possible registers
70// are exhausted) e.g.:
71// imul rax, rbx
72// imul rcx, rbx
73// imul rdx, rbx
74// imul r8, rbx
75//
76// Some instruction even make the above static renaming impossible because
77// they implicitly read and write from the same operand, e.g. ADC16rr reads
78// and writes from EFLAGS.
79// In that case we just use a greedy register assignment and hope for the
80// best.
Clement Courbetac74acd2018-04-04 11:37:06 +000081
82namespace exegesis {
83
Clement Courbet0e69e2d2018-05-17 10:52:18 +000084static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
85 return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
86}
87
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000088llvm::Error
Clement Courbetd939f6d2018-09-13 07:40:53 +000089UopsSnippetGenerator::isInfeasible(const llvm::MCInstrDesc &MCInstrDesc) const {
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000090 if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
91 return llvm::make_error<BenchmarkFailure>(
92 "Infeasible : has unknown operands");
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000093 return llvm::Error::success();
Clement Courbet0e69e2d2018-05-17 10:52:18 +000094}
95
96// Returns whether this Variable ties Use and Def operands together.
Guillaume Chateletef6cef52018-06-20 08:52:30 +000097static bool hasTiedOperands(const Instruction &Instr, const Variable &Var) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +000098 bool HasUse = false;
99 bool HasDef = false;
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000100 for (const unsigned OpIndex : Var.TiedOperands) {
101 const Operand &Op = Instr.Operands[OpIndex];
102 if (Op.IsDef)
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000103 HasDef = true;
104 else
105 HasUse = true;
106 }
107 return HasUse && HasDef;
108}
109
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000110static llvm::SmallVector<const Variable *, 8>
111getTiedVariables(const Instruction &Instr) {
112 llvm::SmallVector<const Variable *, 8> Result;
113 for (const auto &Var : Instr.Variables)
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000114 if (hasTiedOperands(Instr, Var))
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000115 Result.push_back(&Var);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000116 return Result;
117}
118
119static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
120 assert(a.size() == b.size());
121 for (auto I : b.set_bits())
122 a.reset(I);
Clement Courbetac74acd2018-04-04 11:37:06 +0000123}
124
Clement Courbetac74acd2018-04-04 11:37:06 +0000125UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
Clement Courbetd939f6d2018-09-13 07:40:53 +0000126UopsSnippetGenerator::~UopsSnippetGenerator() = default;
Clement Courbetac74acd2018-04-04 11:37:06 +0000127
Clement Courbetd939f6d2018-09-13 07:40:53 +0000128void UopsSnippetGenerator::instantiateMemoryOperands(
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000129 const unsigned ScratchSpacePointerInReg,
130 std::vector<InstructionBuilder> &Instructions) const {
131 if (ScratchSpacePointerInReg == 0)
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000132 return; // no memory operands.
Guillaume Chateletfb943542018-08-01 14:41:45 +0000133 const auto &ET = State.getExegesisTarget();
134 const unsigned MemStep = ET.getMaxMemoryAccessSize();
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000135 const size_t OriginalInstructionsSize = Instructions.size();
Guillaume Chateletfb943542018-08-01 14:41:45 +0000136 size_t I = 0;
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000137 for (InstructionBuilder &IB : Instructions) {
138 ET.fillMemoryOperands(IB, ScratchSpacePointerInReg, I * MemStep);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000139 ++I;
140 }
141
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000142 while (Instructions.size() < kMinNumDifferentAddresses) {
143 InstructionBuilder IB = Instructions[I % OriginalInstructionsSize];
144 ET.fillMemoryOperands(IB, ScratchSpacePointerInReg, I * MemStep);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000145 ++I;
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000146 Instructions.push_back(std::move(IB));
Guillaume Chateletfb943542018-08-01 14:41:45 +0000147 }
Clement Courbetd939f6d2018-09-13 07:40:53 +0000148 assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize &&
149 "not enough scratch space");
Guillaume Chateletfb943542018-08-01 14:41:45 +0000150}
151
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000152llvm::Expected<CodeTemplate>
Clement Courbetd939f6d2018-09-13 07:40:53 +0000153UopsSnippetGenerator::generateCodeTemplate(unsigned Opcode) const {
Clement Courbet0e8bf4e2018-06-25 13:44:27 +0000154 const auto &InstrDesc = State.getInstrInfo().get(Opcode);
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000155 if (auto E = isInfeasible(InstrDesc))
156 return std::move(E);
157 const Instruction Instr(InstrDesc, RATC);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000158 const auto &ET = State.getExegesisTarget();
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000159 CodeTemplate CT;
Guillaume Chateletfb943542018-08-01 14:41:45 +0000160
161 const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr;
162 if (Instr.hasMemoryOperands()) {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000163 CT.ScratchSpacePointerInReg =
Guillaume Chateletfb943542018-08-01 14:41:45 +0000164 ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000165 if (CT.ScratchSpacePointerInReg == 0)
Guillaume Chateletfb943542018-08-01 14:41:45 +0000166 return llvm::make_error<BenchmarkFailure>(
167 "Infeasible : target does not support memory instructions");
168 ScratchSpaceAliasedRegs =
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000169 &RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits();
170 // If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
Guillaume Chateletfb943542018-08-01 14:41:45 +0000171 // FIXME: We could make a copy of the scratch register.
172 for (const auto &Op : Instr.Operands) {
173 if (Op.IsDef && Op.ImplicitReg &&
174 ScratchSpaceAliasedRegs->test(*Op.ImplicitReg))
175 return llvm::make_error<BenchmarkFailure>(
176 "Infeasible : memory instruction uses scratch memory register");
177 }
178 }
179
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000180 const AliasingConfigurations SelfAliasing(Instr, Instr);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000181 InstructionBuilder IB(Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000182 if (SelfAliasing.empty()) {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000183 CT.Info = "instruction is parallel, repeating a random one.";
184 CT.Instructions.push_back(std::move(IB));
185 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
186 return std::move(CT);
Clement Courbetac74acd2018-04-04 11:37:06 +0000187 }
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000188 if (SelfAliasing.hasImplicitAliasing()) {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000189 CT.Info = "instruction is serial, repeating a random one.";
190 CT.Instructions.push_back(std::move(IB));
191 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
192 return std::move(CT);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000193 }
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000194 const auto TiedVariables = getTiedVariables(Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000195 if (!TiedVariables.empty()) {
Guillaume Chateletb4f15822018-06-07 14:00:29 +0000196 if (TiedVariables.size() > 1)
197 return llvm::make_error<llvm::StringError>(
198 "Infeasible : don't know how to handle several tied variables",
199 llvm::inconvertibleErrorCode());
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000200 const Variable *Var = TiedVariables.front();
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000201 assert(Var);
202 assert(!Var->TiedOperands.empty());
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000203 const Operand &Op = Instr.Operands[Var->TiedOperands.front()];
204 assert(Op.Tracker);
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000205 CT.Info = "instruction has tied variables using static renaming.";
Guillaume Chateletef6cef52018-06-20 08:52:30 +0000206 for (const llvm::MCPhysReg Reg : Op.Tracker->sourceBits().set_bits()) {
Guillaume Chateletfb943542018-08-01 14:41:45 +0000207 if (ScratchSpaceAliasedRegs && ScratchSpaceAliasedRegs->test(Reg))
208 continue; // Do not use the scratch memory address register.
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000209 InstructionBuilder TmpIB = IB;
210 TmpIB.getValueFor(*Var) = llvm::MCOperand::createReg(Reg);
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000211 CT.Instructions.push_back(std::move(TmpIB));
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000212 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000213 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
214 return std::move(CT);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000215 }
216 // No tied variables, we pick random values for defs.
Clement Courbet0e8bf4e2018-06-25 13:44:27 +0000217 llvm::BitVector Defs(State.getRegInfo().getNumRegs());
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000218 for (const auto &Op : Instr.Operands) {
Guillaume Chateletfb943542018-08-01 14:41:45 +0000219 if (Op.Tracker && Op.IsExplicit && Op.IsDef && !Op.IsMem) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000220 auto PossibleRegisters = Op.Tracker->sourceBits();
221 remove(PossibleRegisters, RATC.reservedRegisters());
Guillaume Chateletfb943542018-08-01 14:41:45 +0000222 // Do not use the scratch memory address register.
223 if (ScratchSpaceAliasedRegs)
224 remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000225 assert(PossibleRegisters.any() && "No register left to choose from");
226 const auto RandomReg = randomBit(PossibleRegisters);
227 Defs.set(RandomReg);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000228 IB.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000229 }
230 }
231 // And pick random use values that are not reserved and don't alias with defs.
Clement Courbet0e8bf4e2018-06-25 13:44:27 +0000232 const auto DefAliases = getAliasedBits(State.getRegInfo(), Defs);
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000233 for (const auto &Op : Instr.Operands) {
Guillaume Chateletfb943542018-08-01 14:41:45 +0000234 if (Op.Tracker && Op.IsExplicit && !Op.IsDef && !Op.IsMem) {
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000235 auto PossibleRegisters = Op.Tracker->sourceBits();
236 remove(PossibleRegisters, RATC.reservedRegisters());
Guillaume Chateletfb943542018-08-01 14:41:45 +0000237 // Do not use the scratch memory address register.
238 if (ScratchSpaceAliasedRegs)
239 remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000240 remove(PossibleRegisters, DefAliases);
241 assert(PossibleRegisters.any() && "No register left to choose from");
242 const auto RandomReg = randomBit(PossibleRegisters);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000243 IB.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000244 }
245 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000246 CT.Info =
Guillaume Chateletb4f15822018-06-07 14:00:29 +0000247 "instruction has no tied variables picking Uses different from defs";
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000248 CT.Instructions.push_back(std::move(IB));
249 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
250 return std::move(CT);
Clement Courbetac74acd2018-04-04 11:37:06 +0000251}
252
253std::vector<BenchmarkMeasure>
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000254UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
Clement Courbet684a5f62018-09-26 08:37:21 +0000255 ScratchSpace &Scratch) const {
Clement Courbetac74acd2018-04-04 11:37:06 +0000256 const auto &SchedModel = State.getSubtargetInfo().getSchedModel();
257
258 std::vector<BenchmarkMeasure> Result;
259 for (unsigned ProcResIdx = 1;
260 ProcResIdx < SchedModel.getNumProcResourceKinds(); ++ProcResIdx) {
Clement Courbetb4493792018-04-10 08:16:37 +0000261 const char *const PfmCounters = SchedModel.getExtraProcessorInfo()
262 .PfmCounters.IssueCounters[ProcResIdx];
263 if (!PfmCounters)
Clement Courbetac74acd2018-04-04 11:37:06 +0000264 continue;
Clement Courbet38275372018-06-12 13:28:37 +0000265 // We sum counts when there are several counters for a single ProcRes
Clement Courbetb4493792018-04-10 08:16:37 +0000266 // (e.g. P23 on SandyBridge).
Clement Courbet38275372018-06-12 13:28:37 +0000267 int64_t CounterValue = 0;
268 llvm::SmallVector<llvm::StringRef, 2> CounterNames;
269 llvm::StringRef(PfmCounters).split(CounterNames, ',');
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000270 for (const auto &CounterName : CounterNames) {
Clement Courbet38275372018-06-12 13:28:37 +0000271 pfm::PerfEvent UopPerfEvent(CounterName);
272 if (!UopPerfEvent.valid())
273 llvm::report_fatal_error(
274 llvm::Twine("invalid perf event ").concat(PfmCounters));
275 pfm::Counter Counter(UopPerfEvent);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000276 Scratch.clear();
Clement Courbet38275372018-06-12 13:28:37 +0000277 Counter.start();
Guillaume Chateletfb943542018-08-01 14:41:45 +0000278 Function(Scratch.ptr());
Clement Courbet38275372018-06-12 13:28:37 +0000279 Counter.stop();
280 CounterValue += Counter.read();
281 }
Clement Courbetac74acd2018-04-04 11:37:06 +0000282 Result.push_back({llvm::itostr(ProcResIdx),
Clement Courbet684a5f62018-09-26 08:37:21 +0000283 static_cast<double>(CounterValue),
284 static_cast<double>(CounterValue),
Clement Courbetb4493792018-04-10 08:16:37 +0000285 SchedModel.getProcResource(ProcResIdx)->Name});
Clement Courbetac74acd2018-04-04 11:37:06 +0000286 }
287 return Result;
288}
289
Clement Courbetd939f6d2018-09-13 07:40:53 +0000290constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses;
Guillaume Chateletfb943542018-08-01 14:41:45 +0000291
Clement Courbetac74acd2018-04-04 11:37:06 +0000292} // namespace exegesis