blob: 2b0be92e8659a8e87c1abc47bd6c06bab1663f42 [file] [log] [blame]
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +00001//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// This file implements the InstrBuilder interface.
12///
13//===----------------------------------------------------------------------===//
14
15#include "InstrBuilder.h"
16#include "llvm/MC/MCInst.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/raw_ostream.h"
19
20#define DEBUG_TYPE "llvm-mca"
21
22namespace mca {
23
24using namespace llvm;
25
Andrea Di Biagio94fafdf2018-03-24 16:05:36 +000026static void initializeUsedResources(InstrDesc &ID,
27 const MCSchedClassDesc &SCDesc,
28 const MCSubtargetInfo &STI,
29 ArrayRef<uint64_t> ProcResourceMasks) {
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000030 const MCSchedModel &SM = STI.getSchedModel();
31
32 // Populate resources consumed.
33 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
34 std::vector<ResourcePlusCycles> Worklist;
35 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
36 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
37 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
38 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
39 if (PR.BufferSize != -1)
40 ID.Buffers.push_back(Mask);
41 CycleSegment RCy(0, PRE->Cycles, false);
42 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
43 }
44
45 // Sort elements by mask popcount, so that we prioritize resource units over
46 // resource groups, and smaller groups over larger groups.
Mandeep Singh Grang8db564e2018-04-01 21:24:53 +000047 llvm::sort(Worklist.begin(), Worklist.end(),
48 [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
49 unsigned popcntA = countPopulation(A.first);
50 unsigned popcntB = countPopulation(B.first);
51 if (popcntA < popcntB)
52 return true;
53 if (popcntA > popcntB)
54 return false;
55 return A.first < B.first;
56 });
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +000057
58 uint64_t UsedResourceUnits = 0;
59
60 // Remove cycles contributed by smaller resources.
61 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
62 ResourcePlusCycles &A = Worklist[I];
63 if (!A.second.size()) {
64 A.second.NumUnits = 0;
65 A.second.setReserved();
66 ID.Resources.emplace_back(A);
67 continue;
68 }
69
70 ID.Resources.emplace_back(A);
71 uint64_t NormalizedMask = A.first;
72 if (countPopulation(A.first) == 1) {
73 UsedResourceUnits |= A.first;
74 } else {
75 // Remove the leading 1 from the resource group mask.
76 NormalizedMask ^= PowerOf2Floor(NormalizedMask);
77 }
78
79 for (unsigned J = I + 1; J < E; ++J) {
80 ResourcePlusCycles &B = Worklist[J];
81 if ((NormalizedMask & B.first) == NormalizedMask) {
82 B.second.CS.Subtract(A.second.size());
83 if (countPopulation(B.first) > 1)
84 B.second.NumUnits++;
85 }
86 }
87 }
88
89 // A SchedWrite may specify a number of cycles in which a resource group
90 // is reserved. For example (on target x86; cpu Haswell):
91 //
92 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
93 // let ResourceCycles = [2, 2, 3];
94 // }
95 //
96 // This means:
97 // Resource units HWPort0 and HWPort1 are both used for 2cy.
98 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
99 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
100 // will not be usable for 2 entire cycles from instruction issue.
101 //
102 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
103 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
104 // extra delay on top of the 2 cycles latency.
105 // During those extra cycles, HWPort01 is not usable by other instructions.
106 for (ResourcePlusCycles &RPC : ID.Resources) {
107 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
108 // Remove the leading 1 from the resource group mask.
109 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
110 if ((Mask & UsedResourceUnits) == Mask)
111 RPC.second.setReserved();
112 }
113 }
114
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000115 DEBUG({
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000116 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
117 dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
118 for (const uint64_t R : ID.Buffers)
119 dbgs() << "\t\tBuffer Mask=" << R << '\n';
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000120 });
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000121}
122
123static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
124 const MCSchedClassDesc &SCDesc,
125 const MCSubtargetInfo &STI) {
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000126 if (MCDesc.isCall()) {
127 // We cannot estimate how long this call will take.
128 // Artificially set an arbitrarily high latency (100cy).
Andrea Di Biagioc95a1302018-03-13 15:59:59 +0000129 ID.MaxLatency = 100U;
130 return;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000131 }
132
Andrea Di Biagioc95a1302018-03-13 15:59:59 +0000133 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
134 // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
135 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000136}
137
138static void populateWrites(InstrDesc &ID, const MCInst &MCI,
139 const MCInstrDesc &MCDesc,
140 const MCSchedClassDesc &SCDesc,
141 const MCSubtargetInfo &STI) {
142 computeMaxLatency(ID, MCDesc, SCDesc, STI);
143
144 // Set if writes through this opcode may update super registers.
145 // TODO: on x86-64, a 4 byte write of a general purpose register always
146 // fully updates the super-register.
147 // More in general, (at least on x86) not all register writes perform
148 // a partial (super-)register update.
149 // For example, an AVX instruction that writes on a XMM register implicitly
150 // zeroes the upper half of every aliasing super-register.
151 //
152 // For now, we pessimistically assume that writes are all potentially
153 // partial register updates. This is a good default for most targets, execept
154 // for those like x86 which implement a special semantic for certain opcodes.
155 // At least on x86, this may lead to an inaccurate prediction of the
156 // instruction level parallelism.
157 bool FullyUpdatesSuperRegisters = false;
158
159 // Now Populate Writes.
160
161 // This algorithm currently works under the strong (and potentially incorrect)
162 // assumption that information related to register def/uses can be obtained
163 // from MCInstrDesc.
164 //
165 // However class MCInstrDesc is used to describe MachineInstr objects and not
166 // MCInst objects. To be more specific, MCInstrDesc objects are opcode
167 // descriptors that are automatically generated via tablegen based on the
168 // instruction set information available from the target .td files. That
169 // means, the number of (explicit) definitions according to MCInstrDesc always
170 // matches the cardinality of the `(outs)` set in tablegen.
171 //
172 // By constructions, definitions must appear first in the operand sequence of
173 // a MachineInstr. Also, the (outs) sequence is preserved (example: the first
174 // element in the outs set is the first operand in the corresponding
175 // MachineInstr). That's the reason why MCInstrDesc only needs to declare the
176 // total number of register definitions, and not where those definitions are
177 // in the machine operand sequence.
178 //
179 // Unfortunately, it is not safe to use the information from MCInstrDesc to
180 // also describe MCInst objects. An MCInst object can be obtained from a
181 // MachineInstr through a lowering step which may restructure the operand
182 // sequence (and even remove or introduce new operands). So, there is a high
183 // risk that the lowering step breaks the assumptions that register
184 // definitions are always at the beginning of the machine operand sequence.
185 //
186 // This is a fundamental problem, and it is still an open problem. Essentially
187 // we have to find a way to correlate def/use operands of a MachineInstr to
188 // operands of an MCInst. Otherwise, we cannot correctly reconstruct data
189 // dependencies, nor we can correctly interpret the scheduling model, which
190 // heavily uses machine operand indices to define processor read-advance
191 // information, and to identify processor write resources. Essentially, we
192 // either need something like a MCInstrDesc, but for MCInst, or a way
193 // to map MCInst operands back to MachineInstr operands.
194 //
195 // Unfortunately, we don't have that information now. So, this prototype
196 // currently work under the strong assumption that we can always safely trust
197 // the content of an MCInstrDesc. For example, we can query a MCInstrDesc to
198 // obtain the number of explicit and implicit register defintions. We also
199 // assume that register definitions always come first in the operand sequence.
200 // This last assumption usually makes sense for MachineInstr, where register
201 // definitions always appear at the beginning of the operands sequence. In
202 // reality, these assumptions could be broken by the lowering step, which can
203 // decide to lay out operands in a different order than the original order of
204 // operand as specified by the MachineInstr.
205 //
206 // Things get even more complicated in the presence of "optional" register
207 // definitions. For MachineInstr, optional register definitions are always at
208 // the end of the operand sequence. Some ARM instructions that may update the
209 // status flags specify that register as a optional operand. Since we don't
210 // have operand descriptors for MCInst, we assume for now that the optional
211 // definition is always the last operand of a MCInst. Again, this assumption
212 // may be okay for most targets. However, there is no guarantee that targets
213 // would respect that.
214 //
215 // In conclusion: these are for now the strong assumptions made by the tool:
216 // * The number of explicit and implicit register definitions in a MCInst
217 // matches the number of explicit and implicit definitions according to
218 // the opcode descriptor (MCInstrDesc).
219 // * Register definitions take precedence over register uses in the operands
220 // list.
221 // * If an opcode specifies an optional definition, then the optional
222 // definition is always the last operand in the sequence, and it can be
223 // set to zero (i.e. "no register").
224 //
225 // These assumptions work quite well for most out-of-order in-tree targets
226 // like x86. This is mainly because the vast majority of instructions is
227 // expanded to MCInst using a straightforward lowering logic that preserves
228 // the ordering of the operands.
229 //
230 // In the longer term, we need to find a proper solution for this issue.
231 unsigned NumExplicitDefs = MCDesc.getNumDefs();
232 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
233 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
234 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
235 if (MCDesc.hasOptionalDef())
236 TotalDefs++;
237 ID.Writes.resize(TotalDefs);
238 // Iterate over the operands list, and skip non-register operands.
239 // The first NumExplictDefs register operands are expected to be register
240 // definitions.
241 unsigned CurrentDef = 0;
242 unsigned i = 0;
243 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
244 const MCOperand &Op = MCI.getOperand(i);
245 if (!Op.isReg())
246 continue;
247
248 WriteDescriptor &Write = ID.Writes[CurrentDef];
249 Write.OpIndex = i;
250 if (CurrentDef < NumWriteLatencyEntries) {
251 const MCWriteLatencyEntry &WLE =
252 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
253 // Conservatively default to MaxLatency.
254 Write.Latency = WLE.Cycles == -1 ? ID.MaxLatency : WLE.Cycles;
255 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
256 } else {
257 // Assign a default latency for this write.
258 Write.Latency = ID.MaxLatency;
259 Write.SClassOrWriteResourceID = 0;
260 }
261 Write.FullyUpdatesSuperRegs = FullyUpdatesSuperRegisters;
262 Write.IsOptionalDef = false;
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000263 DEBUG({
264 dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", Latency=" << Write.Latency
265 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
266 });
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000267 CurrentDef++;
268 }
269
270 if (CurrentDef != NumExplicitDefs)
271 llvm::report_fatal_error(
272 "error: Expected more register operand definitions. ");
273
274 CurrentDef = 0;
275 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
276 unsigned Index = NumExplicitDefs + CurrentDef;
277 WriteDescriptor &Write = ID.Writes[Index];
278 Write.OpIndex = -1;
279 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
280 Write.Latency = ID.MaxLatency;
281 Write.SClassOrWriteResourceID = 0;
282 Write.IsOptionalDef = false;
283 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
284 DEBUG(dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", PhysReg="
285 << Write.RegisterID << ", Latency=" << Write.Latency
286 << ", WriteResourceID=" << Write.SClassOrWriteResourceID
287 << '\n');
288 }
289
290 if (MCDesc.hasOptionalDef()) {
291 // Always assume that the optional definition is the last operand of the
292 // MCInst sequence.
293 const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
294 if (i == MCI.getNumOperands() || !Op.isReg())
295 llvm::report_fatal_error(
296 "error: expected a register operand for an optional "
297 "definition. Instruction has not be correctly analyzed.\n",
298 false);
299
300 WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
301 Write.OpIndex = MCI.getNumOperands() - 1;
302 // Assign a default latency for this write.
303 Write.Latency = ID.MaxLatency;
304 Write.SClassOrWriteResourceID = 0;
305 Write.IsOptionalDef = true;
306 }
307}
308
309static void populateReads(InstrDesc &ID, const MCInst &MCI,
310 const MCInstrDesc &MCDesc,
311 const MCSchedClassDesc &SCDesc,
312 const MCSubtargetInfo &STI) {
313 unsigned SchedClassID = MCDesc.getSchedClass();
314 bool HasReadAdvanceEntries = SCDesc.NumReadAdvanceEntries > 0;
315
316 unsigned i = 0;
317 unsigned NumExplicitDefs = MCDesc.getNumDefs();
318 // Skip explicit definitions.
319 for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
320 const MCOperand &Op = MCI.getOperand(i);
321 if (Op.isReg())
322 NumExplicitDefs--;
323 }
324
325 if (NumExplicitDefs)
326 llvm::report_fatal_error(
327 "error: Expected more register operand definitions. ", false);
328
329 unsigned NumExplicitUses = MCI.getNumOperands() - i;
330 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
331 if (MCDesc.hasOptionalDef()) {
332 assert(NumExplicitUses);
333 NumExplicitUses--;
334 }
335 unsigned TotalUses = NumExplicitUses + NumImplicitUses;
336 if (!TotalUses)
337 return;
338
339 ID.Reads.resize(TotalUses);
340 for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
341 ReadDescriptor &Read = ID.Reads[CurrentUse];
342 Read.OpIndex = i + CurrentUse;
Andrea Di Biagio0a837ef2018-03-29 14:26:56 +0000343 Read.UseIndex = CurrentUse;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000344 Read.HasReadAdvanceEntries = HasReadAdvanceEntries;
345 Read.SchedClassID = SchedClassID;
346 DEBUG(dbgs() << "\t\tOpIdx=" << Read.OpIndex);
347 }
348
349 for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
350 ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
351 Read.OpIndex = -1;
Andrea Di Biagio0a837ef2018-03-29 14:26:56 +0000352 Read.UseIndex = -1;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000353 Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
354 Read.HasReadAdvanceEntries = false;
355 Read.SchedClassID = SchedClassID;
356 DEBUG(dbgs() << "\t\tOpIdx=" << Read.OpIndex
357 << ", RegisterID=" << Read.RegisterID << '\n');
358 }
359}
360
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000361void InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000362 assert(STI.getSchedModel().hasInstrSchedModel() &&
363 "Itineraries are not yet supported!");
364
365 unsigned short Opcode = MCI.getOpcode();
366 // Obtain the instruction descriptor from the opcode.
367 const MCInstrDesc &MCDesc = MCII.get(Opcode);
368 const MCSchedModel &SM = STI.getSchedModel();
369
370 // Then obtain the scheduling class information from the instruction.
371 const MCSchedClassDesc &SCDesc =
372 *SM.getSchedClassDesc(MCDesc.getSchedClass());
373
374 // Create a new empty descriptor.
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000375 std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000376
377 if (SCDesc.isVariant()) {
378 errs() << "warning: don't know how to model variant opcodes.\n"
379 << "note: assume 1 micro opcode.\n";
380 ID->NumMicroOps = 1U;
381 } else {
382 ID->NumMicroOps = SCDesc.NumMicroOps;
383 }
384
385 if (MCDesc.isCall()) {
386 // We don't correctly model calls.
387 errs() << "warning: found a call in the input assembly sequence.\n"
388 << "note: call instructions are not correctly modeled. Assume a "
389 "latency of 100cy.\n";
390 }
391
392 if (MCDesc.isReturn()) {
393 errs() << "warning: found a return instruction in the input assembly "
394 "sequence.\n"
395 << "note: program counter updates are ignored.\n";
396 }
397
398 ID->MayLoad = MCDesc.mayLoad();
399 ID->MayStore = MCDesc.mayStore();
400 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
401
402 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
403 populateWrites(*ID, MCI, MCDesc, SCDesc, STI);
404 populateReads(*ID, MCI, MCDesc, SCDesc, STI);
405
406 DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
407 DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
408
409 // Now add the new descriptor.
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000410 Descriptors[Opcode] = std::move(ID);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000411}
412
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000413const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
Andrea Di Biagio35622482018-03-22 10:19:20 +0000414 if (Descriptors.find_as(MCI.getOpcode()) == Descriptors.end())
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000415 createInstrDescImpl(MCI);
Andrea Di Biagio35622482018-03-22 10:19:20 +0000416 return *Descriptors[MCI.getOpcode()];
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000417}
418
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000419std::unique_ptr<Instruction>
420InstrBuilder::createInstruction(unsigned Idx, const MCInst &MCI) {
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000421 const InstrDesc &D = getOrCreateInstrDesc(MCI);
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000422 std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000423
424 // Populate Reads first.
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000425 for (const ReadDescriptor &RD : D.Reads) {
426 int RegID = -1;
427 if (RD.OpIndex != -1) {
428 // explicit read.
429 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
430 // Skip non-register operands.
431 if (!Op.isReg())
432 continue;
433 RegID = Op.getReg();
434 } else {
435 // Implicit read.
436 RegID = RD.RegisterID;
437 }
438
439 // Skip invalid register operands.
440 if (!RegID)
441 continue;
442
443 // Okay, this is a register operand. Create a ReadState for it.
444 assert(RegID > 0 && "Invalid register ID found!");
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000445 NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000446 }
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000447
448 // Now populate writes.
449 for (const WriteDescriptor &WD : D.Writes) {
450 unsigned RegID =
451 WD.OpIndex == -1 ? WD.RegisterID : MCI.getOperand(WD.OpIndex).getReg();
Andrea Di Biagio35622482018-03-22 10:19:20 +0000452 // Check if this is a optional definition that references NoReg.
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000453 if (WD.IsOptionalDef && !RegID)
454 continue;
455
Andrea Di Biagio35622482018-03-22 10:19:20 +0000456 assert(RegID && "Expected a valid register ID!");
Andrea Di Biagio7b3d1622018-03-20 12:58:34 +0000457 NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(WD, RegID));
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000458 }
459
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000460 return NewIS;
461}
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000462} // namespace mca