blob: 1bf2b36be52b4104fe1ace739fd5303da589d741 [file] [log] [blame]
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +00001//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// This file implements the InstrBuilder interface.
12///
13//===----------------------------------------------------------------------===//
14
15#include "InstrBuilder.h"
16#include "llvm/MC/MCInst.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/raw_ostream.h"
19
20#define DEBUG_TYPE "llvm-mca"
21
22namespace mca {
23
24using namespace llvm;
25
26static void
27initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc,
28 const MCSubtargetInfo &STI,
29 const ArrayRef<uint64_t> ProcResourceMasks) {
30 const MCSchedModel &SM = STI.getSchedModel();
31
32 // Populate resources consumed.
33 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
34 std::vector<ResourcePlusCycles> Worklist;
35 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
36 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
37 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
38 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
39 if (PR.BufferSize != -1)
40 ID.Buffers.push_back(Mask);
41 CycleSegment RCy(0, PRE->Cycles, false);
42 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
43 }
44
45 // Sort elements by mask popcount, so that we prioritize resource units over
46 // resource groups, and smaller groups over larger groups.
47 std::sort(Worklist.begin(), Worklist.end(),
48 [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
49 unsigned popcntA = countPopulation(A.first);
50 unsigned popcntB = countPopulation(B.first);
51 if (popcntA < popcntB)
52 return true;
53 if (popcntA > popcntB)
54 return false;
55 return A.first < B.first;
56 });
57
58 uint64_t UsedResourceUnits = 0;
59
60 // Remove cycles contributed by smaller resources.
61 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
62 ResourcePlusCycles &A = Worklist[I];
63 if (!A.second.size()) {
64 A.second.NumUnits = 0;
65 A.second.setReserved();
66 ID.Resources.emplace_back(A);
67 continue;
68 }
69
70 ID.Resources.emplace_back(A);
71 uint64_t NormalizedMask = A.first;
72 if (countPopulation(A.first) == 1) {
73 UsedResourceUnits |= A.first;
74 } else {
75 // Remove the leading 1 from the resource group mask.
76 NormalizedMask ^= PowerOf2Floor(NormalizedMask);
77 }
78
79 for (unsigned J = I + 1; J < E; ++J) {
80 ResourcePlusCycles &B = Worklist[J];
81 if ((NormalizedMask & B.first) == NormalizedMask) {
82 B.second.CS.Subtract(A.second.size());
83 if (countPopulation(B.first) > 1)
84 B.second.NumUnits++;
85 }
86 }
87 }
88
89 // A SchedWrite may specify a number of cycles in which a resource group
90 // is reserved. For example (on target x86; cpu Haswell):
91 //
92 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
93 // let ResourceCycles = [2, 2, 3];
94 // }
95 //
96 // This means:
97 // Resource units HWPort0 and HWPort1 are both used for 2cy.
98 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
99 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
100 // will not be usable for 2 entire cycles from instruction issue.
101 //
102 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
103 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
104 // extra delay on top of the 2 cycles latency.
105 // During those extra cycles, HWPort01 is not usable by other instructions.
106 for (ResourcePlusCycles &RPC : ID.Resources) {
107 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
108 // Remove the leading 1 from the resource group mask.
109 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
110 if ((Mask & UsedResourceUnits) == Mask)
111 RPC.second.setReserved();
112 }
113 }
114
115 DEBUG(
116 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
117 dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
118 for (const uint64_t R : ID.Buffers)
119 dbgs() << "\t\tBuffer Mask=" << R << '\n';
120 );
121}
122
123static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
124 const MCSchedClassDesc &SCDesc,
125 const MCSubtargetInfo &STI) {
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000126 if (MCDesc.isCall()) {
127 // We cannot estimate how long this call will take.
128 // Artificially set an arbitrarily high latency (100cy).
Andrea Di Biagioc95a1302018-03-13 15:59:59 +0000129 ID.MaxLatency = 100U;
130 return;
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000131 }
132
Andrea Di Biagioc95a1302018-03-13 15:59:59 +0000133 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
134 // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
135 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000136}
137
138static void populateWrites(InstrDesc &ID, const MCInst &MCI,
139 const MCInstrDesc &MCDesc,
140 const MCSchedClassDesc &SCDesc,
141 const MCSubtargetInfo &STI) {
142 computeMaxLatency(ID, MCDesc, SCDesc, STI);
143
144 // Set if writes through this opcode may update super registers.
145 // TODO: on x86-64, a 4 byte write of a general purpose register always
146 // fully updates the super-register.
147 // More in general, (at least on x86) not all register writes perform
148 // a partial (super-)register update.
149 // For example, an AVX instruction that writes on a XMM register implicitly
150 // zeroes the upper half of every aliasing super-register.
151 //
152 // For now, we pessimistically assume that writes are all potentially
153 // partial register updates. This is a good default for most targets, execept
154 // for those like x86 which implement a special semantic for certain opcodes.
155 // At least on x86, this may lead to an inaccurate prediction of the
156 // instruction level parallelism.
157 bool FullyUpdatesSuperRegisters = false;
158
159 // Now Populate Writes.
160
161 // This algorithm currently works under the strong (and potentially incorrect)
162 // assumption that information related to register def/uses can be obtained
163 // from MCInstrDesc.
164 //
165 // However class MCInstrDesc is used to describe MachineInstr objects and not
166 // MCInst objects. To be more specific, MCInstrDesc objects are opcode
167 // descriptors that are automatically generated via tablegen based on the
168 // instruction set information available from the target .td files. That
169 // means, the number of (explicit) definitions according to MCInstrDesc always
170 // matches the cardinality of the `(outs)` set in tablegen.
171 //
172 // By constructions, definitions must appear first in the operand sequence of
173 // a MachineInstr. Also, the (outs) sequence is preserved (example: the first
174 // element in the outs set is the first operand in the corresponding
175 // MachineInstr). That's the reason why MCInstrDesc only needs to declare the
176 // total number of register definitions, and not where those definitions are
177 // in the machine operand sequence.
178 //
179 // Unfortunately, it is not safe to use the information from MCInstrDesc to
180 // also describe MCInst objects. An MCInst object can be obtained from a
181 // MachineInstr through a lowering step which may restructure the operand
182 // sequence (and even remove or introduce new operands). So, there is a high
183 // risk that the lowering step breaks the assumptions that register
184 // definitions are always at the beginning of the machine operand sequence.
185 //
186 // This is a fundamental problem, and it is still an open problem. Essentially
187 // we have to find a way to correlate def/use operands of a MachineInstr to
188 // operands of an MCInst. Otherwise, we cannot correctly reconstruct data
189 // dependencies, nor we can correctly interpret the scheduling model, which
190 // heavily uses machine operand indices to define processor read-advance
191 // information, and to identify processor write resources. Essentially, we
192 // either need something like a MCInstrDesc, but for MCInst, or a way
193 // to map MCInst operands back to MachineInstr operands.
194 //
195 // Unfortunately, we don't have that information now. So, this prototype
196 // currently work under the strong assumption that we can always safely trust
197 // the content of an MCInstrDesc. For example, we can query a MCInstrDesc to
198 // obtain the number of explicit and implicit register defintions. We also
199 // assume that register definitions always come first in the operand sequence.
200 // This last assumption usually makes sense for MachineInstr, where register
201 // definitions always appear at the beginning of the operands sequence. In
202 // reality, these assumptions could be broken by the lowering step, which can
203 // decide to lay out operands in a different order than the original order of
204 // operand as specified by the MachineInstr.
205 //
206 // Things get even more complicated in the presence of "optional" register
207 // definitions. For MachineInstr, optional register definitions are always at
208 // the end of the operand sequence. Some ARM instructions that may update the
209 // status flags specify that register as a optional operand. Since we don't
210 // have operand descriptors for MCInst, we assume for now that the optional
211 // definition is always the last operand of a MCInst. Again, this assumption
212 // may be okay for most targets. However, there is no guarantee that targets
213 // would respect that.
214 //
215 // In conclusion: these are for now the strong assumptions made by the tool:
216 // * The number of explicit and implicit register definitions in a MCInst
217 // matches the number of explicit and implicit definitions according to
218 // the opcode descriptor (MCInstrDesc).
219 // * Register definitions take precedence over register uses in the operands
220 // list.
221 // * If an opcode specifies an optional definition, then the optional
222 // definition is always the last operand in the sequence, and it can be
223 // set to zero (i.e. "no register").
224 //
225 // These assumptions work quite well for most out-of-order in-tree targets
226 // like x86. This is mainly because the vast majority of instructions is
227 // expanded to MCInst using a straightforward lowering logic that preserves
228 // the ordering of the operands.
229 //
230 // In the longer term, we need to find a proper solution for this issue.
231 unsigned NumExplicitDefs = MCDesc.getNumDefs();
232 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
233 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
234 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
235 if (MCDesc.hasOptionalDef())
236 TotalDefs++;
237 ID.Writes.resize(TotalDefs);
238 // Iterate over the operands list, and skip non-register operands.
239 // The first NumExplictDefs register operands are expected to be register
240 // definitions.
241 unsigned CurrentDef = 0;
242 unsigned i = 0;
243 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
244 const MCOperand &Op = MCI.getOperand(i);
245 if (!Op.isReg())
246 continue;
247
248 WriteDescriptor &Write = ID.Writes[CurrentDef];
249 Write.OpIndex = i;
250 if (CurrentDef < NumWriteLatencyEntries) {
251 const MCWriteLatencyEntry &WLE =
252 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
253 // Conservatively default to MaxLatency.
254 Write.Latency = WLE.Cycles == -1 ? ID.MaxLatency : WLE.Cycles;
255 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
256 } else {
257 // Assign a default latency for this write.
258 Write.Latency = ID.MaxLatency;
259 Write.SClassOrWriteResourceID = 0;
260 }
261 Write.FullyUpdatesSuperRegs = FullyUpdatesSuperRegisters;
262 Write.IsOptionalDef = false;
263 DEBUG(
264 dbgs() << "\t\tOpIdx=" << Write.OpIndex
265 << ", Latency=" << Write.Latency << ", WriteResourceID="
266 << Write.SClassOrWriteResourceID << '\n';
267 );
268 CurrentDef++;
269 }
270
271 if (CurrentDef != NumExplicitDefs)
272 llvm::report_fatal_error(
273 "error: Expected more register operand definitions. ");
274
275 CurrentDef = 0;
276 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
277 unsigned Index = NumExplicitDefs + CurrentDef;
278 WriteDescriptor &Write = ID.Writes[Index];
279 Write.OpIndex = -1;
280 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
281 Write.Latency = ID.MaxLatency;
282 Write.SClassOrWriteResourceID = 0;
283 Write.IsOptionalDef = false;
284 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
285 DEBUG(dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", PhysReg="
286 << Write.RegisterID << ", Latency=" << Write.Latency
287 << ", WriteResourceID=" << Write.SClassOrWriteResourceID
288 << '\n');
289 }
290
291 if (MCDesc.hasOptionalDef()) {
292 // Always assume that the optional definition is the last operand of the
293 // MCInst sequence.
294 const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
295 if (i == MCI.getNumOperands() || !Op.isReg())
296 llvm::report_fatal_error(
297 "error: expected a register operand for an optional "
298 "definition. Instruction has not be correctly analyzed.\n",
299 false);
300
301 WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
302 Write.OpIndex = MCI.getNumOperands() - 1;
303 // Assign a default latency for this write.
304 Write.Latency = ID.MaxLatency;
305 Write.SClassOrWriteResourceID = 0;
306 Write.IsOptionalDef = true;
307 }
308}
309
310static void populateReads(InstrDesc &ID, const MCInst &MCI,
311 const MCInstrDesc &MCDesc,
312 const MCSchedClassDesc &SCDesc,
313 const MCSubtargetInfo &STI) {
314 unsigned SchedClassID = MCDesc.getSchedClass();
315 bool HasReadAdvanceEntries = SCDesc.NumReadAdvanceEntries > 0;
316
317 unsigned i = 0;
318 unsigned NumExplicitDefs = MCDesc.getNumDefs();
319 // Skip explicit definitions.
320 for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
321 const MCOperand &Op = MCI.getOperand(i);
322 if (Op.isReg())
323 NumExplicitDefs--;
324 }
325
326 if (NumExplicitDefs)
327 llvm::report_fatal_error(
328 "error: Expected more register operand definitions. ", false);
329
330 unsigned NumExplicitUses = MCI.getNumOperands() - i;
331 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
332 if (MCDesc.hasOptionalDef()) {
333 assert(NumExplicitUses);
334 NumExplicitUses--;
335 }
336 unsigned TotalUses = NumExplicitUses + NumImplicitUses;
337 if (!TotalUses)
338 return;
339
340 ID.Reads.resize(TotalUses);
341 for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
342 ReadDescriptor &Read = ID.Reads[CurrentUse];
343 Read.OpIndex = i + CurrentUse;
344 Read.HasReadAdvanceEntries = HasReadAdvanceEntries;
345 Read.SchedClassID = SchedClassID;
346 DEBUG(dbgs() << "\t\tOpIdx=" << Read.OpIndex);
347 }
348
349 for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
350 ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
351 Read.OpIndex = -1;
352 Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
353 Read.HasReadAdvanceEntries = false;
354 Read.SchedClassID = SchedClassID;
355 DEBUG(dbgs() << "\t\tOpIdx=" << Read.OpIndex
356 << ", RegisterID=" << Read.RegisterID << '\n');
357 }
358}
359
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000360void InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000361 assert(STI.getSchedModel().hasInstrSchedModel() &&
362 "Itineraries are not yet supported!");
363
364 unsigned short Opcode = MCI.getOpcode();
365 // Obtain the instruction descriptor from the opcode.
366 const MCInstrDesc &MCDesc = MCII.get(Opcode);
367 const MCSchedModel &SM = STI.getSchedModel();
368
369 // Then obtain the scheduling class information from the instruction.
370 const MCSchedClassDesc &SCDesc =
371 *SM.getSchedClassDesc(MCDesc.getSchedClass());
372
373 // Create a new empty descriptor.
374 InstrDesc *ID = new InstrDesc();
375
376 if (SCDesc.isVariant()) {
377 errs() << "warning: don't know how to model variant opcodes.\n"
378 << "note: assume 1 micro opcode.\n";
379 ID->NumMicroOps = 1U;
380 } else {
381 ID->NumMicroOps = SCDesc.NumMicroOps;
382 }
383
384 if (MCDesc.isCall()) {
385 // We don't correctly model calls.
386 errs() << "warning: found a call in the input assembly sequence.\n"
387 << "note: call instructions are not correctly modeled. Assume a "
388 "latency of 100cy.\n";
389 }
390
391 if (MCDesc.isReturn()) {
392 errs() << "warning: found a return instruction in the input assembly "
393 "sequence.\n"
394 << "note: program counter updates are ignored.\n";
395 }
396
397 ID->MayLoad = MCDesc.mayLoad();
398 ID->MayStore = MCDesc.mayStore();
399 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
400
401 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
402 populateWrites(*ID, MCI, MCDesc, SCDesc, STI);
403 populateReads(*ID, MCI, MCDesc, SCDesc, STI);
404
405 DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
406 DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
407
408 // Now add the new descriptor.
409 Descriptors[Opcode] = std::unique_ptr<const InstrDesc>(ID);
410}
411
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000412const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000413 auto it = Descriptors.find(MCI.getOpcode());
414 if (it == Descriptors.end())
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000415 createInstrDescImpl(MCI);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000416 return *Descriptors[MCI.getOpcode()].get();
417}
418
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000419Instruction *InstrBuilder::createInstruction(unsigned Idx, const MCInst &MCI) {
420 const InstrDesc &D = getOrCreateInstrDesc(MCI);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000421 Instruction *NewIS = new Instruction(D);
422
423 // Populate Reads first.
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000424 for (const ReadDescriptor &RD : D.Reads) {
425 int RegID = -1;
426 if (RD.OpIndex != -1) {
427 // explicit read.
428 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
429 // Skip non-register operands.
430 if (!Op.isReg())
431 continue;
432 RegID = Op.getReg();
433 } else {
434 // Implicit read.
435 RegID = RD.RegisterID;
436 }
437
438 // Skip invalid register operands.
439 if (!RegID)
440 continue;
441
442 // Okay, this is a register operand. Create a ReadState for it.
443 assert(RegID > 0 && "Invalid register ID found!");
Andrea Di Biagio4732d43ca2018-03-14 14:57:23 +0000444 ReadState *NewRDS = new ReadState(RD, RegID);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000445 NewIS->getUses().emplace_back(std::unique_ptr<ReadState>(NewRDS));
Andrea Di Biagio4704f032018-03-20 12:25:54 +0000446 }
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000447
448 // Now populate writes.
449 for (const WriteDescriptor &WD : D.Writes) {
450 unsigned RegID =
451 WD.OpIndex == -1 ? WD.RegisterID : MCI.getOperand(WD.OpIndex).getReg();
452 assert((RegID || WD.IsOptionalDef) && "Expected a valid register ID!");
453 // Special case where this is a optional definition, and the actual register
454 // is 0.
455 if (WD.IsOptionalDef && !RegID)
456 continue;
457
458 WriteState *NewWS = new WriteState(WD);
459 NewIS->getDefs().emplace_back(std::unique_ptr<WriteState>(NewWS));
460 NewWS->setRegisterID(RegID);
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000461 }
462
Andrea Di Biagio3a6b0922018-03-08 13:05:02 +0000463 return NewIS;
464}
465
466} // namespace mca