Blame - llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp - toolchain/llvm-project

blob: f73759112357e93c19d183f25f9ab461ca360c84 [file] [log] [blame]

Roman Lebedev	1d1330c	2019-03-29 14:24:27 +0000	[diff] [blame]	1	//===-- SchedClassResolution.cpp --------------------------------- C++ --===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8
				9	#include "SchedClassResolution.h"
				10	#include "BenchmarkResult.h"
				11	#include "llvm/ADT/STLExtras.h"
				12	#include "llvm/MC/MCAsmInfo.h"
				13	#include "llvm/Support/FormatVariadic.h"
				14	#include <limits>
				15	#include <unordered_set>
				16	#include <vector>
				17
				18	namespace llvm {
				19	namespace exegesis {
				20
				21	// Return the non-redundant list of WriteProcRes used by the given sched class.
				22	// The scheduling model for LLVM is such that each instruction has a certain
				23	// number of uops which consume resources which are described by WriteProcRes
				24	// entries. Each entry describe how many cycles are spent on a specific ProcRes
				25	// kind.
				26	// For example, an instruction might have 3 uOps, one dispatching on P0
				27	// (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
				28	// Note that LLVM additionally denormalizes resource consumption to include
				29	// usage of super resources by subresources. So in practice if there exists a
				30	// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
				31	// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
				32	// by P06 are also consumed by P016. In the figure below, parenthesized cycles
				33	// denote implied usage of superresources by subresources:
				34	// P0 P06 P016
				35	// uOp1 1 (1) (1)
				36	// uOp2 1 (1)
				37	// uOp3 1 (1)
				38	// =============================
				39	// 1 3 3
				40	// Eventually we end up with three entries for the WriteProcRes of the
				41	// instruction:
				42	// {ProcResIdx=1, Cycles=1} // P0
				43	// {ProcResIdx=7, Cycles=3} // P06
				44	// {ProcResIdx=10, Cycles=3} // P016
				45	//
				46	// Note that in this case, P016 does not contribute any cycles, so it would
				47	// be removed by this function.
				48	// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
				49	static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
				50	getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
				51	const llvm::MCSubtargetInfo &STI) {
				52	llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
				53	const auto &SM = STI.getSchedModel();
				54	const unsigned NumProcRes = SM.getNumProcResourceKinds();
				55
				56	// This assumes that the ProcResDescs are sorted in topological order, which
				57	// is guaranteed by the tablegen backend.
				58	llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
				59	for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
				60	*const WPREnd = STI.getWriteProcResEnd(&SCDesc);
				61	WPR != WPREnd; ++WPR) {
				62	const llvm::MCProcResourceDesc *const ProcResDesc =
				63	SM.getProcResource(WPR->ProcResourceIdx);
				64	if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
				65	// This is a ProcResUnit.
				66	Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
				67	ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
				68	} else {
				69	// This is a ProcResGroup. First see if it contributes any cycles or if
				70	// it has cycles just from subunits.
				71	float RemainingCycles = WPR->Cycles;
				72	for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
				73	SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
				74	++SubResIdx) {
				75	RemainingCycles -= ProcResUnitUsage[*SubResIdx];
				76	}
				77	if (RemainingCycles < 0.01f) {
				78	// The ProcResGroup contributes no cycles of its own.
				79	continue;
				80	}
				81	// The ProcResGroup contributes `RemainingCycles` cycles of its own.
				82	Result.push_back({WPR->ProcResourceIdx,
				83	static_cast<uint16_t>(std::round(RemainingCycles))});
				84	// Spread the remaining cycles over all subunits.
				85	for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
				86	SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
				87	++SubResIdx) {
				88	ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
				89	}
				90	}
				91	}
				92	return Result;
				93	}
				94
				95	// Distributes a pressure budget as evenly as possible on the provided subunits
				96	// given the already existing port pressure distribution.
				97	//
				98	// The algorithm is as follows: while there is remaining pressure to
				99	// distribute, find the subunits with minimal pressure, and distribute
				100	// remaining pressure equally up to the pressure of the unit with
				101	// second-to-minimal pressure.
				102	// For example, let's assume we want to distribute 2*P1256
				103	// (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
				104	// DensePressure = P0 P1 P2 P3 P4 P5 P6 P7
				105	// 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5
				106	// RemainingPressure = 2.0
				107	// We sort the subunits by pressure:
				108	// Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
				109	// We'll first start by the subunits with minimal pressure, which are at
				110	// the beginning of the sorted array. In this example there is one (P2).
				111	// The subunit with second-to-minimal pressure is the next one in the
				112	// array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
				113	// from the budget.
				114	// Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
				115	// RemainingPressure = 1.9
				116	// We repeat this process: distribute 0.2 pressure on each of the minimal
				117	// P2 and P1, decrease budget by 2*0.2:
				118	// Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
				119	// RemainingPressure = 1.5
				120	// There are no second-to-minimal subunits so we just share the remaining
				121	// budget (1.5 cycles) equally:
				122	// Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
				123	// RemainingPressure = 0.0
				124	// We stop as there is no remaining budget to distribute.
				125	static void distributePressure(float RemainingPressure,
				126	llvm::SmallVector<uint16_t, 32> Subunits,
				127	llvm::SmallVector<float, 32> &DensePressure) {
				128	// Find the number of subunits with minimal pressure (they are at the
				129	// front).
				130	llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
				131	return DensePressure[A] < DensePressure[B];
				132	});
				133	const auto getPressureForSubunit = [&DensePressure,
				134	&Subunits](size_t I) -> float & {
				135	return DensePressure[Subunits[I]];
				136	};
				137	size_t NumMinimalSU = 1;
				138	while (NumMinimalSU < Subunits.size() &&
				139	getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
				140	++NumMinimalSU;
				141	}
				142	while (RemainingPressure > 0.0f) {
				143	if (NumMinimalSU == Subunits.size()) {
				144	// All units are minimal, just distribute evenly and be done.
				145	for (size_t I = 0; I < NumMinimalSU; ++I) {
				146	getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
				147	}
				148	return;
				149	}
				150	// Distribute the remaining pressure equally.
				151	const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
				152	const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
				153	assert(MinimalPressure < SecondToMinimalPressure);
				154	const float Increment = SecondToMinimalPressure - MinimalPressure;
				155	if (RemainingPressure <= NumMinimalSU * Increment) {
				156	// There is not enough remaining pressure.
				157	for (size_t I = 0; I < NumMinimalSU; ++I) {
				158	getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
				159	}
				160	return;
				161	}
				162	// Bump all minimal pressure subunits to `SecondToMinimalPressure`.
				163	for (size_t I = 0; I < NumMinimalSU; ++I) {
				164	getPressureForSubunit(I) = SecondToMinimalPressure;
				165	RemainingPressure -= SecondToMinimalPressure;
				166	}
				167	while (NumMinimalSU < Subunits.size() &&
				168	getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
				169	++NumMinimalSU;
				170	}
				171	}
				172	}
				173
				174	std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
				175	const llvm::MCSchedModel &SM,
				176	llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
				177	// DensePressure[I] is the port pressure for Proc Resource I.
				178	llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
				179	llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
				180	const llvm::MCWriteProcResEntry &B) {
				181	return A.ProcResourceIdx < B.ProcResourceIdx;
				182	});
				183	for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
				184	// Get units for the entry.
				185	const llvm::MCProcResourceDesc *const ProcResDesc =
				186	SM.getProcResource(WPR.ProcResourceIdx);
				187	if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
				188	// This is a ProcResUnit.
				189	DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
				190	} else {
				191	// This is a ProcResGroup.
				192	llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
				193	ProcResDesc->SubUnitsIdxBegin +
				194	ProcResDesc->NumUnits);
				195	distributePressure(WPR.Cycles, Subunits, DensePressure);
				196	}
				197	}
				198	// Turn dense pressure into sparse pressure by removing zero entries.
				199	std::vector<std::pair<uint16_t, float>> Pressure;
				200	for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
				201	if (DensePressure[I] > 0.0f)
				202	Pressure.emplace_back(I, DensePressure[I]);
				203	}
				204	return Pressure;
				205	}
				206
				207	ResolvedSchedClass::ResolvedSchedClass(const llvm::MCSubtargetInfo &STI,
				208	unsigned ResolvedSchedClassId,
				209	bool WasVariant)
				210	: SchedClassId(ResolvedSchedClassId),
				211	SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
				212	WasVariant(WasVariant),
				213	NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
				214	IdealizedProcResPressure(computeIdealizedProcResPressure(
				215	STI.getSchedModel(), NonRedundantWriteProcRes)) {
				216	assert((SCDesc == nullptr \|\| !SCDesc->isVariant()) &&
				217	"ResolvedSchedClass should never be variant");
				218	}
				219
				220	static unsigned ResolveVariantSchedClassId(const llvm::MCSubtargetInfo &STI,
				221	unsigned SchedClassId,
				222	const llvm::MCInst &MCI) {
				223	const auto &SM = STI.getSchedModel();
				224	while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
				225	SchedClassId =
				226	STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
				227	return SchedClassId;
				228	}
				229
				230	std::pair<unsigned /SchedClassId/, bool /WasVariant/>
				231	ResolvedSchedClass::resolveSchedClassId(
				232	const llvm::MCSubtargetInfo &SubtargetInfo,
				233	const llvm::MCInstrInfo &InstrInfo, const llvm::MCInst &MCI) {
				234	unsigned SchedClassId = InstrInfo.get(MCI.getOpcode()).getSchedClass();
				235	const bool WasVariant = SchedClassId && SubtargetInfo.getSchedModel()
				236	.getSchedClassDesc(SchedClassId)
				237	->isVariant();
				238	SchedClassId = ResolveVariantSchedClassId(SubtargetInfo, SchedClassId, MCI);
				239	return std::make_pair(SchedClassId, WasVariant);
				240	}
				241
Roman Lebedev	4d81e87	2019-03-29 14:58:01 +0000	[diff] [blame^]	242	// Returns a ProxResIdx by id or name.
				243	static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
				244	const llvm::StringRef NameOrId) {
				245	// Interpret the key as an ProcResIdx.
				246	unsigned ProcResIdx = 0;
				247	if (llvm::to_integer(NameOrId, ProcResIdx, 10))
				248	return ProcResIdx;
				249	// Interpret the key as a ProcRes name.
				250	const auto &SchedModel = STI.getSchedModel();
				251	for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
				252	if (NameOrId == SchedModel.getProcResource(I)->Name)
				253	return I;
				254	}
				255	return 0;
				256	}
				257
				258	std::vector<BenchmarkMeasure> ResolvedSchedClass::getAsPoint(
				259	InstructionBenchmark::ModeE Mode, const llvm::MCSubtargetInfo &STI,
				260	ArrayRef<PerInstructionStats> Representative) const {
				261	const size_t NumMeasurements = Representative.size();
				262
				263	std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
				264
				265	if (Mode == InstructionBenchmark::Latency) {
				266	assert(NumMeasurements == 1 && "Latency is a single measure.");
				267	BenchmarkMeasure &LatencyMeasure = SchedClassPoint[0];
				268
				269	// Find the latency.
				270	LatencyMeasure.PerInstructionValue = 0.0;
				271
				272	for (unsigned I = 0; I < SCDesc->NumWriteLatencyEntries; ++I) {
				273	const llvm::MCWriteLatencyEntry *const WLE =
				274	STI.getWriteLatencyEntry(SCDesc, I);
				275	LatencyMeasure.PerInstructionValue =
				276	std::max<double>(LatencyMeasure.PerInstructionValue, WLE->Cycles);
				277	}
				278	} else if (Mode == InstructionBenchmark::Uops) {
				279	for (const auto &I : llvm::zip(SchedClassPoint, Representative)) {
				280	BenchmarkMeasure &Measure = std::get<0>(I);
				281	const PerInstructionStats &Stats = std::get<1>(I);
				282
				283	StringRef Key = Stats.key();
				284	uint16_t ProcResIdx = findProcResIdx(STI, Key);
				285	if (ProcResIdx > 0) {
				286	// Find the pressure on ProcResIdx `Key`.
				287	const auto ProcResPressureIt = std::find_if(
				288	IdealizedProcResPressure.begin(), IdealizedProcResPressure.end(),
				289	[ProcResIdx](const std::pair<uint16_t, float> &WPR) {
				290	return WPR.first == ProcResIdx;
				291	});
				292	Measure.PerInstructionValue =
				293	ProcResPressureIt == IdealizedProcResPressure.end()
				294	? 0.0
				295	: ProcResPressureIt->second;
				296	} else if (Key == "NumMicroOps") {
				297	Measure.PerInstructionValue = SCDesc->NumMicroOps;
				298	} else {
				299	llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
				300	"name, got "
				301	<< Key << "\n";
				302	return {};
				303	}
				304	}
				305	} else if (Mode == InstructionBenchmark::InverseThroughput) {
				306	assert(NumMeasurements == 1 && "Inverse Throughput is a single measure.");
				307	BenchmarkMeasure &RThroughputMeasure = SchedClassPoint[0];
				308
				309	RThroughputMeasure.PerInstructionValue =
				310	MCSchedModel::getReciprocalThroughput(STI, *SCDesc);
				311	} else {
				312	llvm_unreachable("unimplemented measurement matching mode");
				313	}
				314
				315	return SchedClassPoint;
				316	}
				317
Roman Lebedev	1d1330c	2019-03-29 14:24:27 +0000	[diff] [blame]	318	} // namespace exegesis
				319	} // namespace llvm