blob: 8fc8fd278d41d7f3c7f9e4911e0aaa8fa9b25a16 [file] [log] [blame]
Clement Courbetac74acd2018-04-04 11:37:06 +00001//===-- Uops.cpp ------------------------------------------------*- C++ -*-===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Clement Courbetac74acd2018-04-04 11:37:06 +00006//
7//===----------------------------------------------------------------------===//
8
9#include "Uops.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000010
11#include "Assembler.h"
12#include "BenchmarkRunner.h"
13#include "MCInstrDescView.h"
Guillaume Chateletfb943542018-08-01 14:41:45 +000014#include "Target.h"
Clement Courbet0e69e2d2018-05-17 10:52:18 +000015
16// FIXME: Load constants into registers (e.g. with fld1) to not break
17// instructions like x87.
18
19// Ideally we would like the only limitation on executing uops to be the issue
20// ports. Maximizing port pressure increases the likelihood that the load is
21// distributed evenly across possible ports.
22
23// To achieve that, one approach is to generate instructions that do not have
24// data dependencies between them.
25//
26// For some instructions, this is trivial:
27// mov rax, qword ptr [rsi]
28// mov rax, qword ptr [rsi]
29// mov rax, qword ptr [rsi]
30// mov rax, qword ptr [rsi]
31// For the above snippet, haswell just renames rax four times and executes the
32// four instructions two at a time on P23 and P0126.
33//
34// For some instructions, we just need to make sure that the source is
35// different from the destination. For example, IDIV8r reads from GPR and
36// writes to AX. We just need to ensure that the Var is assigned a
37// register which is different from AX:
38// idiv bx
39// idiv bx
40// idiv bx
41// idiv bx
42// The above snippet will be able to fully saturate the ports, while the same
43// with ax would issue one uop every `latency(IDIV8r)` cycles.
44//
45// Some instructions make this harder because they both read and write from
46// the same register:
47// inc rax
48// inc rax
49// inc rax
50// inc rax
51// This has a data dependency from each instruction to the next, limit the
52// number of instructions that can be issued in parallel.
53// It turns out that this is not a big issue on recent Intel CPUs because they
54// have heuristics to balance port pressure. In the snippet above, subsequent
55// instructions will end up evenly distributed on {P0,P1,P5,P6}, but some CPUs
56// might end up executing them all on P0 (just because they can), or try
57// avoiding P5 because it's usually under high pressure from vector
58// instructions.
59// This issue is even more important for high-latency instructions because
60// they increase the idle time of the CPU, e.g. :
61// imul rax, rbx
62// imul rax, rbx
63// imul rax, rbx
64// imul rax, rbx
65//
66// To avoid that, we do the renaming statically by generating as many
67// independent exclusive assignments as possible (until all possible registers
68// are exhausted) e.g.:
69// imul rax, rbx
70// imul rcx, rbx
71// imul rdx, rbx
72// imul r8, rbx
73//
74// Some instruction even make the above static renaming impossible because
75// they implicitly read and write from the same operand, e.g. ADC16rr reads
76// and writes from EFLAGS.
77// In that case we just use a greedy register assignment and hope for the
78// best.
Clement Courbetac74acd2018-04-04 11:37:06 +000079
Fangrui Song32401af2018-10-22 17:10:47 +000080namespace llvm {
Clement Courbetac74acd2018-04-04 11:37:06 +000081namespace exegesis {
82
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000083static llvm::SmallVector<const Variable *, 8>
Guillaume Chatelet09c28392018-10-09 08:59:10 +000084getVariablesWithTiedOperands(const Instruction &Instr) {
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000085 llvm::SmallVector<const Variable *, 8> Result;
86 for (const auto &Var : Instr.Variables)
Guillaume Chatelet09c28392018-10-09 08:59:10 +000087 if (Var.hasTiedOperands())
Guillaume Chateletc9f727b2018-06-13 13:24:41 +000088 Result.push_back(&Var);
Clement Courbet0e69e2d2018-05-17 10:52:18 +000089 return Result;
90}
91
92static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
93 assert(a.size() == b.size());
94 for (auto I : b.set_bits())
95 a.reset(I);
Clement Courbetac74acd2018-04-04 11:37:06 +000096}
97
Clement Courbetac74acd2018-04-04 11:37:06 +000098UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
Guillaume Chateletffc3ffa2018-10-10 09:45:17 +000099
Clement Courbetd939f6d2018-09-13 07:40:53 +0000100UopsSnippetGenerator::~UopsSnippetGenerator() = default;
Clement Courbetac74acd2018-04-04 11:37:06 +0000101
Clement Courbetd939f6d2018-09-13 07:40:53 +0000102void UopsSnippetGenerator::instantiateMemoryOperands(
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000103 const unsigned ScratchSpacePointerInReg,
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000104 std::vector<InstructionTemplate> &Instructions) const {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000105 if (ScratchSpacePointerInReg == 0)
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000106 return; // no memory operands.
Guillaume Chateletfb943542018-08-01 14:41:45 +0000107 const auto &ET = State.getExegesisTarget();
108 const unsigned MemStep = ET.getMaxMemoryAccessSize();
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000109 const size_t OriginalInstructionsSize = Instructions.size();
Guillaume Chateletfb943542018-08-01 14:41:45 +0000110 size_t I = 0;
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000111 for (InstructionTemplate &IT : Instructions) {
112 ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000113 ++I;
114 }
115
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000116 while (Instructions.size() < kMinNumDifferentAddresses) {
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000117 InstructionTemplate IT = Instructions[I % OriginalInstructionsSize];
118 ET.fillMemoryOperands(IT, ScratchSpacePointerInReg, I * MemStep);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000119 ++I;
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000120 Instructions.push_back(std::move(IT));
Guillaume Chateletfb943542018-08-01 14:41:45 +0000121 }
Clement Courbetd939f6d2018-09-13 07:40:53 +0000122 assert(I * MemStep < BenchmarkRunner::ScratchSpace::kSize &&
123 "not enough scratch space");
Guillaume Chateletfb943542018-08-01 14:41:45 +0000124}
125
Clement Courbet0ddf81c2019-02-26 10:54:45 +0000126static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
127 const LLVMState &State, const InstructionTemplate &IT,
128 const ArrayRef<const Variable *> TiedVariables,
129 const BitVector *ScratchSpaceAliasedRegs) {
130 std::vector<InstructionTemplate> Instructions;
131 // Assign registers to variables in a round-robin manner. This is simple but
132 // ensures that the most register-constrained variable does not get starved.
133 std::vector<BitVector> PossibleRegsForVar;
134 for (const Variable *Var : TiedVariables) {
135 assert(Var);
136 const Operand &Op = IT.Instr.getPrimaryOperand(*Var);
137 assert(Op.isReg());
138 BitVector PossibleRegs = State.getRATC().emptyRegisters();
139 if (ScratchSpaceAliasedRegs) {
140 PossibleRegs |= *ScratchSpaceAliasedRegs;
141 }
142 PossibleRegs.flip();
143 PossibleRegs &= Op.getRegisterAliasing().sourceBits();
144 PossibleRegsForVar.push_back(std::move(PossibleRegs));
145 }
146 SmallVector<int, 2> Iterators(TiedVariables.size(), 0);
147 while (true) {
148 InstructionTemplate TmpIT = IT;
149 // Find a possible register for each variable in turn, marking the
150 // register as taken.
151 for (size_t VarId = 0; VarId < TiedVariables.size(); ++VarId) {
152 const int NextPossibleReg =
153 PossibleRegsForVar[VarId].find_next(Iterators[VarId]);
154 if (NextPossibleReg <= 0) {
155 return Instructions;
156 }
157 TmpIT.getValueFor(*TiedVariables[VarId]) =
158 llvm::MCOperand::createReg(NextPossibleReg);
159 // Bump iterator.
160 Iterators[VarId] = NextPossibleReg;
161 // Prevent other variables from using the register.
162 for (BitVector &OtherPossibleRegs : PossibleRegsForVar) {
163 OtherPossibleRegs.reset(NextPossibleReg);
164 }
165 }
166 Instructions.push_back(std::move(TmpIT));
167 }
168}
169
Guillaume Chatelet296a8622018-10-15 09:09:19 +0000170llvm::Expected<std::vector<CodeTemplate>>
171UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000172 CodeTemplate CT;
Guillaume Chateletfb943542018-08-01 14:41:45 +0000173 const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr;
174 if (Instr.hasMemoryOperands()) {
Guillaume Chatelet9b592382018-10-10 14:57:32 +0000175 const auto &ET = State.getExegesisTarget();
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000176 CT.ScratchSpacePointerInReg =
Guillaume Chateletfb943542018-08-01 14:41:45 +0000177 ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000178 if (CT.ScratchSpacePointerInReg == 0)
Guillaume Chateletfb943542018-08-01 14:41:45 +0000179 return llvm::make_error<BenchmarkFailure>(
180 "Infeasible : target does not support memory instructions");
181 ScratchSpaceAliasedRegs =
Guillaume Chateletee9c2a172018-10-10 14:22:48 +0000182 &State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits();
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000183 // If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
Guillaume Chateletfb943542018-08-01 14:41:45 +0000184 // FIXME: We could make a copy of the scratch register.
185 for (const auto &Op : Instr.Operands) {
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000186 if (Op.isDef() && Op.isImplicitReg() &&
187 ScratchSpaceAliasedRegs->test(Op.getImplicitReg()))
Guillaume Chateletfb943542018-08-01 14:41:45 +0000188 return llvm::make_error<BenchmarkFailure>(
189 "Infeasible : memory instruction uses scratch memory register");
190 }
191 }
192
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000193 const AliasingConfigurations SelfAliasing(Instr, Instr);
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000194 InstructionTemplate IT(Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000195 if (SelfAliasing.empty()) {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000196 CT.Info = "instruction is parallel, repeating a random one.";
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000197 CT.Instructions.push_back(std::move(IT));
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000198 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
Guillaume Chateletfcbb6f32018-10-17 11:37:28 +0000199 return getSingleton(std::move(CT));
Clement Courbetac74acd2018-04-04 11:37:06 +0000200 }
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000201 if (SelfAliasing.hasImplicitAliasing()) {
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000202 CT.Info = "instruction is serial, repeating a random one.";
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000203 CT.Instructions.push_back(std::move(IT));
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000204 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
Guillaume Chateletfcbb6f32018-10-17 11:37:28 +0000205 return getSingleton(std::move(CT));
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000206 }
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000207 const auto TiedVariables = getVariablesWithTiedOperands(Instr);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000208 if (!TiedVariables.empty()) {
Clement Courbet0ddf81c2019-02-26 10:54:45 +0000209 CT.Info = "instruction has tied variables, using static renaming.";
210 CT.Instructions = generateSnippetUsingStaticRenaming(
211 State, IT, TiedVariables, ScratchSpaceAliasedRegs);
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000212 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
Guillaume Chateletfcbb6f32018-10-17 11:37:28 +0000213 return getSingleton(std::move(CT));
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000214 }
Guillaume Chateletee9c2a172018-10-10 14:22:48 +0000215 const auto &ReservedRegisters = State.getRATC().reservedRegisters();
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000216 // No tied variables, we pick random values for defs.
Clement Courbet0e8bf4e2018-06-25 13:44:27 +0000217 llvm::BitVector Defs(State.getRegInfo().getNumRegs());
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000218 for (const auto &Op : Instr.Operands) {
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000219 if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
220 auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
Guillaume Chateletee9c2a172018-10-10 14:22:48 +0000221 remove(PossibleRegisters, ReservedRegisters);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000222 // Do not use the scratch memory address register.
223 if (ScratchSpaceAliasedRegs)
224 remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000225 assert(PossibleRegisters.any() && "No register left to choose from");
226 const auto RandomReg = randomBit(PossibleRegisters);
227 Defs.set(RandomReg);
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000228 IT.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000229 }
230 }
231 // And pick random use values that are not reserved and don't alias with defs.
Clement Courbet0e8bf4e2018-06-25 13:44:27 +0000232 const auto DefAliases = getAliasedBits(State.getRegInfo(), Defs);
Guillaume Chateletc9f727b2018-06-13 13:24:41 +0000233 for (const auto &Op : Instr.Operands) {
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000234 if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
235 auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
Guillaume Chateletee9c2a172018-10-10 14:22:48 +0000236 remove(PossibleRegisters, ReservedRegisters);
Guillaume Chateletfb943542018-08-01 14:41:45 +0000237 // Do not use the scratch memory address register.
238 if (ScratchSpaceAliasedRegs)
239 remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000240 remove(PossibleRegisters, DefAliases);
241 assert(PossibleRegisters.any() && "No register left to choose from");
242 const auto RandomReg = randomBit(PossibleRegisters);
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000243 IT.getValueFor(Op) = llvm::MCOperand::createReg(RandomReg);
Clement Courbet0e69e2d2018-05-17 10:52:18 +0000244 }
245 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000246 CT.Info =
Guillaume Chateletb4f15822018-06-07 14:00:29 +0000247 "instruction has no tied variables picking Uses different from defs";
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000248 CT.Instructions.push_back(std::move(IT));
Guillaume Chatelete60866a2018-08-03 09:29:38 +0000249 instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
Guillaume Chateletfcbb6f32018-10-17 11:37:28 +0000250 return getSingleton(std::move(CT));
Clement Courbetac74acd2018-04-04 11:37:06 +0000251}
252
Clement Courbetf973c2d2018-10-17 15:04:15 +0000253llvm::Expected<std::vector<BenchmarkMeasure>>
254UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const {
Clement Courbet596c56f2018-09-26 11:22:56 +0000255 std::vector<BenchmarkMeasure> Result;
Clement Courbet41c8af32018-10-25 07:44:01 +0000256 const PfmCountersInfo &PCI = State.getPfmCounters();
Clement Courbet596c56f2018-09-26 11:22:56 +0000257 // Uops per port.
Clement Courbet41c8af32018-10-25 07:44:01 +0000258 for (const auto *IssueCounter = PCI.IssueCounters,
259 *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters;
260 IssueCounter != IssueCounterEnd; ++IssueCounter) {
261 if (!IssueCounter->Counter)
Clement Courbet596c56f2018-09-26 11:22:56 +0000262 continue;
Clement Courbet41c8af32018-10-25 07:44:01 +0000263 auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter);
Clement Courbetf973c2d2018-10-17 15:04:15 +0000264 if (!ExpectedCounterValue)
265 return ExpectedCounterValue.takeError();
Clement Courbet41c8af32018-10-25 07:44:01 +0000266 Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName,
267 *ExpectedCounterValue));
Clement Courbet596c56f2018-09-26 11:22:56 +0000268 }
269 // NumMicroOps.
Clement Courbet41c8af32018-10-25 07:44:01 +0000270 if (const char *const UopsCounter = PCI.UopsCounter) {
Clement Courbetf973c2d2018-10-17 15:04:15 +0000271 auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter);
272 if (!ExpectedCounterValue)
273 return ExpectedCounterValue.takeError();
274 Result.push_back(
275 BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue));
Clement Courbetac74acd2018-04-04 11:37:06 +0000276 }
Clement Courbetf973c2d2018-10-17 15:04:15 +0000277 return std::move(Result);
Clement Courbetac74acd2018-04-04 11:37:06 +0000278}
279
Clement Courbetd939f6d2018-09-13 07:40:53 +0000280constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses;
Guillaume Chateletfb943542018-08-01 14:41:45 +0000281
Clement Courbetac74acd2018-04-04 11:37:06 +0000282} // namespace exegesis
Fangrui Song32401af2018-10-22 17:10:47 +0000283} // namespace llvm