blob: bf132724eaa19ba022be180b4edb52d2b1b0a264 [file] [log] [blame]
Clement Courbet37f0ca02018-05-15 12:08:00 +00001//===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
Clement Courbet37f0ca02018-05-15 12:08:00 +000010#include "Analysis.h"
Clement Courbeta66bfaa42018-05-15 13:07:05 +000011#include "BenchmarkResult.h"
Clement Courbetdf79e792018-06-01 14:18:02 +000012#include "llvm/ADT/STLExtras.h"
Clement Courbet37f0ca02018-05-15 12:08:00 +000013#include "llvm/Support/FormatVariadic.h"
Clement Courbet448550d2018-05-17 12:25:18 +000014#include <unordered_set>
Clement Courbet37f0ca02018-05-15 12:08:00 +000015#include <vector>
16
17namespace exegesis {
18
19static const char kCsvSep = ',';
20
Clement Courbet17d3c252018-05-22 13:31:29 +000021namespace {
22
23enum EscapeTag { kEscapeCsv, kEscapeHtml };
24
25template <EscapeTag Tag>
26void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
27
28template <>
29void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
Clement Courbet37f0ca02018-05-15 12:08:00 +000030 if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
31 OS << S;
32 } else {
33 // Needs escaping.
34 OS << '"';
35 for (const char C : S) {
36 if (C == '"')
37 OS << "\"\"";
38 else
39 OS << C;
40 }
41 OS << '"';
42 }
43}
44
Clement Courbet17d3c252018-05-22 13:31:29 +000045template <>
46void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
47 for (const char C : S) {
48 if (C == '<')
49 OS << "&lt;";
50 else if (C == '>')
51 OS << "&gt;";
52 else if (C == '&')
53 OS << "&amp;";
54 else
55 OS << C;
56 }
57}
58
59} // namespace
60
61template <EscapeTag Tag>
62static void
63writeClusterId(llvm::raw_ostream &OS,
64 const InstructionBenchmarkClustering::ClusterId &CID) {
65 if (CID.isNoise())
66 writeEscaped<Tag>(OS, "[noise]");
67 else if (CID.isError())
68 writeEscaped<Tag>(OS, "[error]");
69 else
70 OS << CID.getId();
71}
72
73template <EscapeTag Tag>
74static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
75 writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
76}
77
Clement Courbet37f0ca02018-05-15 12:08:00 +000078// Prints a row representing an instruction, along with scheduling info and
79// point coordinates (measurements).
Clement Courbet17d3c252018-05-22 13:31:29 +000080void Analysis::printInstructionRowCsv(const size_t PointId,
81 llvm::raw_ostream &OS) const {
Clement Courbet6d6c1a92018-05-16 08:47:21 +000082 const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
Clement Courbet17d3c252018-05-22 13:31:29 +000083 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
Clement Courbet448550d2018-05-17 12:25:18 +000084 OS << kCsvSep;
Clement Courbet17d3c252018-05-22 13:31:29 +000085 writeEscaped<kEscapeCsv>(OS, Point.Key.OpcodeName);
Clement Courbeta66bfaa42018-05-15 13:07:05 +000086 OS << kCsvSep;
Clement Courbet17d3c252018-05-22 13:31:29 +000087 writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
88 OS << kCsvSep;
89 const auto OpcodeIt = MnemonicToOpcode_.find(Point.Key.OpcodeName);
90 if (OpcodeIt != MnemonicToOpcode_.end()) {
91 const unsigned SchedClassId =
92 InstrInfo_->get(OpcodeIt->second).getSchedClass();
Clement Courbet6d6c1a92018-05-16 08:47:21 +000093#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Clement Courbet17d3c252018-05-22 13:31:29 +000094 const auto &SchedModel = SubtargetInfo_->getSchedModel();
95 const llvm::MCSchedClassDesc *const SCDesc =
96 SchedModel.getSchedClassDesc(SchedClassId);
97 writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
Clement Courbet6d6c1a92018-05-16 08:47:21 +000098#else
Clement Courbet17d3c252018-05-22 13:31:29 +000099 OS << SchedClassId;
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000100#endif
101 }
Clement Courbet37f0ca02018-05-15 12:08:00 +0000102 // FIXME: Print the sched class once InstructionBenchmark separates key into
103 // (mnemonic, mode, opaque).
104 for (const auto &Measurement : Point.Measurements) {
105 OS << kCsvSep;
Clement Courbet17d3c252018-05-22 13:31:29 +0000106 writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
Clement Courbet37f0ca02018-05-15 12:08:00 +0000107 }
108 OS << "\n";
109}
110
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000111Analysis::Analysis(const llvm::Target &Target,
112 const InstructionBenchmarkClustering &Clustering)
113 : Clustering_(Clustering) {
114 if (Clustering.getPoints().empty())
115 return;
116
117 InstrInfo_.reset(Target.createMCInstrInfo());
118 const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
Clement Courbet448550d2018-05-17 12:25:18 +0000119 SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
120 FirstPoint.CpuName, ""));
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000121
122 // Build an index of mnemonic->opcode.
123 for (int I = 0, E = InstrInfo_->getNumOpcodes(); I < E; ++I)
124 MnemonicToOpcode_.emplace(InstrInfo_->getName(I), I);
Clement Courbet37f0ca02018-05-15 12:08:00 +0000125}
126
Clement Courbetcf210742018-05-17 13:41:28 +0000127template <>
128llvm::Error
129Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000130 if (Clustering_.getPoints().empty())
Clement Courbet37f0ca02018-05-15 12:08:00 +0000131 return llvm::Error::success();
132
133 // Write the header.
Clement Courbeta66bfaa42018-05-15 13:07:05 +0000134 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
135 << kCsvSep << "sched_class";
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000136 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
Clement Courbet37f0ca02018-05-15 12:08:00 +0000137 OS << kCsvSep;
Clement Courbet17d3c252018-05-22 13:31:29 +0000138 writeEscaped<kEscapeCsv>(OS, Measurement.Key);
Clement Courbet37f0ca02018-05-15 12:08:00 +0000139 }
140 OS << "\n";
141
142 // Write the points.
Clement Courbet448550d2018-05-17 12:25:18 +0000143 const auto &Clusters = Clustering_.getValidClusters();
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000144 for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
145 for (const size_t PointId : Clusters[I].PointIndices) {
Clement Courbet17d3c252018-05-22 13:31:29 +0000146 printInstructionRowCsv(PointId, OS);
Clement Courbet6d6c1a92018-05-16 08:47:21 +0000147 }
Clement Courbet37f0ca02018-05-15 12:08:00 +0000148 OS << "\n\n";
149 }
150 return llvm::Error::success();
151}
152
Clement Courbet448550d2018-05-17 12:25:18 +0000153std::unordered_map<unsigned, std::vector<size_t>>
154Analysis::makePointsPerSchedClass() const {
155 std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
156 const auto &Points = Clustering_.getPoints();
157 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
158 const InstructionBenchmark &Point = Points[PointId];
159 if (!Point.Error.empty())
160 continue;
161 const auto OpcodeIt = MnemonicToOpcode_.find(Point.Key.OpcodeName);
162 if (OpcodeIt == MnemonicToOpcode_.end())
163 continue;
164 const unsigned SchedClassId =
165 InstrInfo_->get(OpcodeIt->second).getSchedClass();
166 PointsPerSchedClass[SchedClassId].push_back(PointId);
167 }
168 return PointsPerSchedClass;
169}
170
Clement Courbet72287212018-06-04 11:11:55 +0000171void Analysis::printSchedClassClustersHtml(
172 const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
173 llvm::raw_ostream &OS) const {
Clement Courbet17d3c252018-05-22 13:31:29 +0000174 const auto &Points = Clustering_.getPoints();
Clement Courbet2637e5f2018-05-24 10:47:05 +0000175 OS << "<table class=\"sched-class-clusters\">";
Clement Courbet17d3c252018-05-22 13:31:29 +0000176 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
Clement Courbet72287212018-06-04 11:11:55 +0000177 assert(!Clusters.empty());
178 for (const auto &Measurement :
179 Points[Clusters[0].getPointIds()[0]].Measurements) {
Clement Courbet17d3c252018-05-22 13:31:29 +0000180 OS << "<th>";
Clement Courbetb1f1b502018-05-24 11:26:00 +0000181 if (Measurement.DebugString.empty())
182 writeEscaped<kEscapeHtml>(OS, Measurement.Key);
183 else
184 writeEscaped<kEscapeHtml>(OS, Measurement.DebugString);
Clement Courbet17d3c252018-05-22 13:31:29 +0000185 OS << "</th>";
186 }
187 OS << "</tr>";
Clement Courbet72287212018-06-04 11:11:55 +0000188 for (const SchedClassCluster &Cluster : Clusters) {
189 OS << "<tr class=\""
190 << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
191 ? "good-cluster"
192 : "bad-cluster")
193 << "\"><td>";
194 writeClusterId<kEscapeHtml>(OS, Cluster.id());
Clement Courbet17d3c252018-05-22 13:31:29 +0000195 OS << "</td><td><ul>";
Clement Courbet72287212018-06-04 11:11:55 +0000196 for (const size_t PointId : Cluster.getPointIds()) {
197 const auto &Point = Points[PointId];
Clement Courbet17d3c252018-05-22 13:31:29 +0000198 OS << "<li><span class=\"mono\">";
Clement Courbetae8ae5dc2018-05-24 12:41:02 +0000199 writeEscaped<kEscapeHtml>(OS, Point.Key.OpcodeName);
Clement Courbet17d3c252018-05-22 13:31:29 +0000200 OS << "</span> <span class=\"mono\">";
Clement Courbetae8ae5dc2018-05-24 12:41:02 +0000201 writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
Clement Courbet17d3c252018-05-22 13:31:29 +0000202 OS << "</span></li>";
203 }
204 OS << "</ul></td>";
Clement Courbet72287212018-06-04 11:11:55 +0000205 for (const auto &Stats : Cluster.getRepresentative()) {
Clement Courbetae8ae5dc2018-05-24 12:41:02 +0000206 OS << "<td class=\"measurement\">";
207 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
208 OS << "<br><span class=\"minmax\">[";
209 writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
210 OS << ";";
211 writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
212 OS << "]</span></td>";
Clement Courbet17d3c252018-05-22 13:31:29 +0000213 }
214 OS << "</tr>";
215 }
216 OS << "</table>";
217}
218
Clement Courbet2637e5f2018-05-24 10:47:05 +0000219// Return the non-redundant list of WriteProcRes used by the given sched class.
220// The scheduling model for LLVM is such that each instruction has a certain
221// number of uops which consume resources which are described by WriteProcRes
222// entries. Each entry describe how many cycles are spent on a specific ProcRes
223// kind.
224// For example, an instruction might have 3 uOps, one dispatching on P0
225// (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
226// Note that LLVM additionally denormalizes resource consumption to include
227// usage of super resources by subresources. So in practice if there exists a
228// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
229// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
230// by P06 are also consumed by P016. In the figure below, parenthesized cycles
231// denote implied usage of superresources by subresources:
232// P0 P06 P016
233// uOp1 1 (1) (1)
234// uOp2 1 (1)
235// uOp3 1 (1)
236// =============================
237// 1 3 3
238// Eventually we end up with three entries for the WriteProcRes of the
239// instruction:
240// {ProcResIdx=1, Cycles=1} // P0
241// {ProcResIdx=7, Cycles=3} // P06
242// {ProcResIdx=10, Cycles=3} // P016
243//
244// Note that in this case, P016 does not contribute any cycles, so it would
245// be removed by this function.
246// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
247static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
248getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
249 const llvm::MCSubtargetInfo &STI) {
250 llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
251 const auto &SM = STI.getSchedModel();
252 const unsigned NumProcRes = SM.getNumProcResourceKinds();
253
254 // This assumes that the ProcResDescs are sorted in topological order, which
255 // is guaranteed by the tablegen backend.
256 llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
257 for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
258 *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
259 WPR != WPREnd; ++WPR) {
260 const llvm::MCProcResourceDesc *const ProcResDesc =
261 SM.getProcResource(WPR->ProcResourceIdx);
262 if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
263 // This is a ProcResUnit.
264 Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
265 ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
266 } else {
267 // This is a ProcResGroup. First see if it contributes any cycles or if
268 // it has cycles just from subunits.
269 float RemainingCycles = WPR->Cycles;
270 for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
271 SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
272 ++SubResIdx) {
273 RemainingCycles -= ProcResUnitUsage[*SubResIdx];
274 }
275 if (RemainingCycles < 0.01f) {
276 // The ProcResGroup contributes no cycles of its own.
277 continue;
278 }
279 // The ProcResGroup contributes `RemainingCycles` cycles of its own.
280 Result.push_back({WPR->ProcResourceIdx,
281 static_cast<uint16_t>(std::round(RemainingCycles))});
282 // Spread the remaining cycles over all subunits.
283 for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
284 SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
285 ++SubResIdx) {
286 ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
287 }
288 }
289 }
290 return Result;
291}
292
Clement Courbet72287212018-06-04 11:11:55 +0000293Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
294 const llvm::MCSubtargetInfo &STI)
295 : SCDesc(SD),
296 NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
297 IdealizedProcResPressure(computeIdealizedProcResPressure(
298 STI.getSchedModel(), NonRedundantWriteProcRes)) {}
299
300void Analysis::SchedClassCluster::addPoint(
301 size_t PointId, const InstructionBenchmarkClustering &Clustering) {
302 PointIds.push_back(PointId);
303 const auto &Point = Clustering.getPoints()[PointId];
304 if (ClusterId.isUndef()) {
305 ClusterId = Clustering.getClusterIdForPoint(PointId);
306 Representative.resize(Point.Measurements.size());
307 }
308 for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
309 Representative[I].push(Point.Measurements[I]);
310 }
311 assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
312}
313
314bool Analysis::SchedClassCluster::measurementsMatch(
315 const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
316 const InstructionBenchmarkClustering &Clustering) const {
317 const size_t NumMeasurements = Representative.size();
318 std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
319 std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
320 // Latency case.
321 assert(!Clustering.getPoints().empty());
Clement Courbet2cb97b92018-06-04 11:43:40 +0000322 const InstructionBenchmarkKey::ModeE Mode =
323 Clustering.getPoints()[0].Key.Mode;
324 if (Mode == InstructionBenchmarkKey::Latency) {
Clement Courbet72287212018-06-04 11:11:55 +0000325 if (NumMeasurements != 1) {
326 llvm::errs()
327 << "invalid number of measurements in latency mode: expected 1, got "
328 << NumMeasurements << "\n";
329 return false;
330 }
331 // Find the latency.
332 SchedClassPoint[0].Value = 0.0;
333 for (unsigned I = 0; I < SC.SCDesc.NumWriteLatencyEntries; ++I) {
334 const llvm::MCWriteLatencyEntry *const WLE =
335 STI.getWriteLatencyEntry(&SC.SCDesc, I);
336 SchedClassPoint[0].Value =
337 std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
338 }
339 ClusterCenterPoint[0].Value = Representative[0].avg();
Clement Courbet2cb97b92018-06-04 11:43:40 +0000340 } else if (Mode == InstructionBenchmarkKey::Uops) {
Clement Courbet72287212018-06-04 11:11:55 +0000341 for (int I = 0, E = Representative.size(); I < E; ++I) {
342 // Find the pressure on ProcResIdx `Key`.
343 uint16_t ProcResIdx = 0;
344 if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
345 llvm::errs() << "expected ProcResIdx key, got "
346 << Representative[I].key() << "\n";
347 return false;
348 }
349 const auto ProcResPressureIt =
350 std::find_if(SC.IdealizedProcResPressure.begin(),
351 SC.IdealizedProcResPressure.end(),
352 [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
353 return WPR.first == ProcResIdx;
354 });
355 SchedClassPoint[I].Value =
356 ProcResPressureIt == SC.IdealizedProcResPressure.end()
357 ? 0.0
358 : ProcResPressureIt->second;
359 ClusterCenterPoint[I].Value = Representative[I].avg();
360 }
361 } else {
Clement Courbet2cb97b92018-06-04 11:43:40 +0000362 llvm::errs() << "unimplemented measurement matching for mode " << Mode
363 << "\n";
Clement Courbet72287212018-06-04 11:11:55 +0000364 return false;
365 }
366 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
367}
368
369void Analysis::printSchedClassDescHtml(const SchedClass &SC,
Clement Courbet2637e5f2018-05-24 10:47:05 +0000370 llvm::raw_ostream &OS) const {
371 OS << "<table class=\"sched-class-desc\">";
372 OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
Clement Courbetdf79e792018-06-01 14:18:02 +0000373 "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
374 "resource (port) pressure assuming ideal distribution\">Idealized "
375 "Resource Pressure</th></tr>";
Clement Courbet72287212018-06-04 11:11:55 +0000376 if (SC.SCDesc.isValid()) {
Clement Courbetdf79e792018-06-01 14:18:02 +0000377 const auto &SM = SubtargetInfo_->getSchedModel();
Clement Courbet2637e5f2018-05-24 10:47:05 +0000378 OS << "<tr><td>&#10004;</td>";
Clement Courbet72287212018-06-04 11:11:55 +0000379 OS << "<td>" << (SC.SCDesc.isVariant() ? "&#10004;" : "&#10005;")
380 << "</td>";
381 OS << "<td>" << SC.SCDesc.NumMicroOps << "</td>";
Clement Courbet2637e5f2018-05-24 10:47:05 +0000382 // Latencies.
383 OS << "<td><ul>";
Clement Courbet72287212018-06-04 11:11:55 +0000384 for (int I = 0, E = SC.SCDesc.NumWriteLatencyEntries; I < E; ++I) {
Clement Courbet2637e5f2018-05-24 10:47:05 +0000385 const auto *const Entry =
Clement Courbet72287212018-06-04 11:11:55 +0000386 SubtargetInfo_->getWriteLatencyEntry(&SC.SCDesc, I);
Clement Courbet2637e5f2018-05-24 10:47:05 +0000387 OS << "<li>" << Entry->Cycles;
Clement Courbet72287212018-06-04 11:11:55 +0000388 if (SC.SCDesc.NumWriteLatencyEntries > 1) {
Clement Courbet2637e5f2018-05-24 10:47:05 +0000389 // Dismabiguate if more than 1 latency.
390 OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
391 }
392 OS << "</li>";
393 }
394 OS << "</ul></td>";
395 // WriteProcRes.
396 OS << "<td><ul>";
Clement Courbet72287212018-06-04 11:11:55 +0000397 for (const auto &WPR : SC.NonRedundantWriteProcRes) {
Clement Courbetdf79e792018-06-01 14:18:02 +0000398 OS << "<li><span class=\"mono\">";
399 writeEscaped<kEscapeHtml>(OS,
400 SM.getProcResource(WPR.ProcResourceIdx)->Name);
401 OS << "</span>: " << WPR.Cycles << "</li>";
402 }
403 OS << "</ul></td>";
404 // Idealized port pressure.
405 OS << "<td><ul>";
Clement Courbet72287212018-06-04 11:11:55 +0000406 for (const auto &Pressure : SC.IdealizedProcResPressure) {
Clement Courbet2637e5f2018-05-24 10:47:05 +0000407 OS << "<li><span class=\"mono\">";
408 writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
Clement Courbetdf79e792018-06-01 14:18:02 +0000409 .getProcResource(Pressure.first)
Clement Courbet2637e5f2018-05-24 10:47:05 +0000410 ->Name);
Clement Courbetdf79e792018-06-01 14:18:02 +0000411 OS << "</span>: ";
412 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
413 OS << "</li>";
Clement Courbet2637e5f2018-05-24 10:47:05 +0000414 }
415 OS << "</ul></td>";
416 OS << "</tr>";
417 } else {
418 OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
419 }
420 OS << "</table>";
421}
422
Clement Courbet17d3c252018-05-22 13:31:29 +0000423static constexpr const char kHtmlHead[] = R"(
424<head>
425<title>llvm-exegesis Analysis Results</title>
426<style>
427body {
428 font-family: sans-serif
429}
430span.sched-class-name {
431 font-weight: bold;
432 font-family: monospace;
433}
434span.opcode {
435 font-family: monospace;
436}
437span.config {
438 font-family: monospace;
439}
440div.inconsistency {
441 margin-top: 50px;
442}
Clement Courbet2637e5f2018-05-24 10:47:05 +0000443table {
Clement Courbet17d3c252018-05-22 13:31:29 +0000444 margin-left: 50px;
445 border-collapse: collapse;
446}
Clement Courbet2637e5f2018-05-24 10:47:05 +0000447table, table tr,td,th {
Clement Courbet17d3c252018-05-22 13:31:29 +0000448 border: 1px solid #444;
449}
Clement Courbet2637e5f2018-05-24 10:47:05 +0000450table ul {
451 padding-left: 0px;
452 margin: 0px;
453 list-style-type: none;
454}
455table.sched-class-clusters td {
Clement Courbet17d3c252018-05-22 13:31:29 +0000456 padding-left: 10px;
457 padding-right: 10px;
458 padding-top: 10px;
459 padding-bottom: 10px;
460}
Clement Courbet2637e5f2018-05-24 10:47:05 +0000461table.sched-class-desc td {
462 padding-left: 10px;
463 padding-right: 10px;
464 padding-top: 2px;
465 padding-bottom: 2px;
Clement Courbet17d3c252018-05-22 13:31:29 +0000466}
467span.mono {
468 font-family: monospace;
469}
Clement Courbetae8ae5dc2018-05-24 12:41:02 +0000470td.measurement {
471 text-align: center;
472}
Clement Courbet72287212018-06-04 11:11:55 +0000473tr.good-cluster td.measurement {
474 color: #292
475}
476tr.bad-cluster td.measurement {
477 color: #922
478}
479tr.good-cluster td.measurement span.minmax {
480 color: #888;
481}
482tr.bad-cluster td.measurement span.minmax {
483 color: #888;
484}
Clement Courbet17d3c252018-05-22 13:31:29 +0000485</style>
486</head>
487)";
488
Clement Courbetcf210742018-05-17 13:41:28 +0000489template <>
490llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
491 llvm::raw_ostream &OS) const {
Clement Courbet72287212018-06-04 11:11:55 +0000492 const auto &FirstPoint = Clustering_.getPoints()[0];
Clement Courbet17d3c252018-05-22 13:31:29 +0000493 // Print the header.
494 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
495 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
496 OS << "<h3>Triple: <span class=\"mono\">";
Clement Courbet72287212018-06-04 11:11:55 +0000497 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
Clement Courbet17d3c252018-05-22 13:31:29 +0000498 OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
Clement Courbet72287212018-06-04 11:11:55 +0000499 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
Clement Courbet17d3c252018-05-22 13:31:29 +0000500 OS << "</span></h3>";
501
Clement Courbet448550d2018-05-17 12:25:18 +0000502 for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
Clement Courbet72287212018-06-04 11:11:55 +0000503 const auto SchedClassId = SchedClassAndPoints.first;
504 const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
Clement Courbet448550d2018-05-17 12:25:18 +0000505 const auto &SchedModel = SubtargetInfo_->getSchedModel();
506 const llvm::MCSchedClassDesc *const SCDesc =
Clement Courbet72287212018-06-04 11:11:55 +0000507 SchedModel.getSchedClassDesc(SchedClassId);
Clement Courbet2637e5f2018-05-24 10:47:05 +0000508 if (!SCDesc)
509 continue;
Clement Courbet72287212018-06-04 11:11:55 +0000510 const SchedClass SC(*SCDesc, *SubtargetInfo_);
511
512 // Bucket sched class points into sched class clusters.
513 std::vector<SchedClassCluster> SchedClassClusters;
514 for (const size_t PointId : SchedClassPoints) {
515 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
516 if (!ClusterId.isValid())
517 continue; // Ignore noise and errors. FIXME: take noise into account ?
518 auto SchedClassClusterIt =
519 std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
520 [ClusterId](const SchedClassCluster &C) {
521 return C.id() == ClusterId;
522 });
523 if (SchedClassClusterIt == SchedClassClusters.end()) {
524 SchedClassClusters.emplace_back();
525 SchedClassClusterIt = std::prev(SchedClassClusters.end());
526 }
527 SchedClassClusterIt->addPoint(PointId, Clustering_);
528 }
529
530 // Print any scheduling class that has at least one cluster that does not
531 // match the checked-in data.
532 if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
533 [this, &SC](const SchedClassCluster &C) {
534 return C.measurementsMatch(*SubtargetInfo_, SC,
535 Clustering_);
536 }))
537 continue; // Nothing weird.
538
Clement Courbet2637e5f2018-05-24 10:47:05 +0000539 OS << "<div class=\"inconsistency\"><p>Sched Class <span "
540 "class=\"sched-class-name\">";
541#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Clement Courbet17d3c252018-05-22 13:31:29 +0000542 writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
Clement Courbet448550d2018-05-17 12:25:18 +0000543#else
Clement Courbet72287212018-06-04 11:11:55 +0000544 OS << SchedClassId;
Clement Courbet448550d2018-05-17 12:25:18 +0000545#endif
Clement Courbet72287212018-06-04 11:11:55 +0000546 OS << "</span> contains instructions whose performance characteristics do"
547 " not match that of LLVM:</p>";
548 printSchedClassClustersHtml(SchedClassClusters, SC, OS);
549 OS << "<p>llvm SchedModel data:</p>";
550 printSchedClassDescHtml(SC, OS);
Clement Courbet17d3c252018-05-22 13:31:29 +0000551 OS << "</div>";
Clement Courbet448550d2018-05-17 12:25:18 +0000552 }
Clement Courbet17d3c252018-05-22 13:31:29 +0000553
554 OS << "</body></html>";
Clement Courbet448550d2018-05-17 12:25:18 +0000555 return llvm::Error::success();
556}
557
Clement Courbetdf79e792018-06-01 14:18:02 +0000558// Distributes a pressure budget as evenly as possible on the provided subunits
559// given the already existing port pressure distribution.
560//
561// The algorithm is as follows: while there is remaining pressure to
562// distribute, find the subunits with minimal pressure, and distribute
563// remaining pressure equally up to the pressure of the unit with
564// second-to-minimal pressure.
565// For example, let's assume we want to distribute 2*P1256
566// (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
567// DensePressure = P0 P1 P2 P3 P4 P5 P6 P7
568// 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5
569// RemainingPressure = 2.0
570// We sort the subunits by pressure:
571// Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
572// We'll first start by the subunits with minimal pressure, which are at
573// the beginning of the sorted array. In this example there is one (P2).
574// The subunit with second-to-minimal pressure is the next one in the
575// array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
576// from the budget.
577// Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
578// RemainingPressure = 1.9
579// We repeat this process: distribute 0.2 pressure on each of the minimal
580// P2 and P1, decrease budget by 2*0.2:
581// Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
582// RemainingPressure = 1.5
583// There are no second-to-minimal subunits so we just share the remaining
584// budget (1.5 cycles) equally:
585// Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
586// RemainingPressure = 0.0
587// We stop as there is no remaining budget to distribute.
588void distributePressure(float RemainingPressure,
589 llvm::SmallVector<uint16_t, 32> Subunits,
590 llvm::SmallVector<float, 32> &DensePressure) {
591 // Find the number of subunits with minimal pressure (they are at the
592 // front).
593 llvm::sort(Subunits.begin(), Subunits.end(),
594 [&DensePressure](const uint16_t A, const uint16_t B) {
595 return DensePressure[A] < DensePressure[B];
596 });
597 const auto getPressureForSubunit = [&DensePressure,
598 &Subunits](size_t I) -> float & {
599 return DensePressure[Subunits[I]];
600 };
601 size_t NumMinimalSU = 1;
602 while (NumMinimalSU < Subunits.size() &&
603 getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
604 ++NumMinimalSU;
605 }
606 while (RemainingPressure > 0.0f) {
607 if (NumMinimalSU == Subunits.size()) {
608 // All units are minimal, just distribute evenly and be done.
609 for (size_t I = 0; I < NumMinimalSU; ++I) {
610 getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
611 }
612 return;
613 }
614 // Distribute the remaining pressure equally.
615 const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
616 const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
617 assert(MinimalPressure < SecondToMinimalPressure);
618 const float Increment = SecondToMinimalPressure - MinimalPressure;
619 if (RemainingPressure <= NumMinimalSU * Increment) {
620 // There is not enough remaining pressure.
621 for (size_t I = 0; I < NumMinimalSU; ++I) {
622 getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
623 }
624 return;
625 }
626 // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
627 for (size_t I = 0; I < NumMinimalSU; ++I) {
628 getPressureForSubunit(I) = SecondToMinimalPressure;
629 RemainingPressure -= SecondToMinimalPressure;
630 }
631 while (NumMinimalSU < Subunits.size() &&
632 getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
633 ++NumMinimalSU;
634 }
635 }
636}
637
638std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
639 const llvm::MCSchedModel &SM,
640 llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
641 // DensePressure[I] is the port pressure for Proc Resource I.
642 llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
643 llvm::sort(WPRS.begin(), WPRS.end(),
644 [](const llvm::MCWriteProcResEntry &A,
645 const llvm::MCWriteProcResEntry &B) {
646 return A.ProcResourceIdx < B.ProcResourceIdx;
647 });
648 for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
649 // Get units for the entry.
650 const llvm::MCProcResourceDesc *const ProcResDesc =
651 SM.getProcResource(WPR.ProcResourceIdx);
652 if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
653 // This is a ProcResUnit.
654 DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
655 } else {
656 // This is a ProcResGroup.
657 llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
658 ProcResDesc->SubUnitsIdxBegin +
659 ProcResDesc->NumUnits);
660 distributePressure(WPR.Cycles, Subunits, DensePressure);
661 }
662 }
663 // Turn dense pressure into sparse pressure by removing zero entries.
664 std::vector<std::pair<uint16_t, float>> Pressure;
665 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
666 if (DensePressure[I] > 0.0f)
667 Pressure.emplace_back(I, DensePressure[I]);
668 }
669 return Pressure;
670}
671
Clement Courbet37f0ca02018-05-15 12:08:00 +0000672} // namespace exegesis