Blame - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp - toolchain/llvm-project

blob: 8476eadbda67f79f0216192eb9678e4dd476f7e1 [file] [log] [blame]

Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Loops should be simplified before this analysis.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	14	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
				15	#include "llvm/ADT/APFloat.h"
				16	#include "llvm/Support/raw_ostream.h"
				17	#include <deque>
				18
				19	using namespace llvm;
				20
Chandler Carruth	1b9dde0	2014-04-22 02:02:50 +0000	[diff] [blame]	21	#define DEBUG_TYPE "block-freq"
				22
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	23	//===----------------------------------------------------------------------===//
				24	//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	25	// UnsignedFloat implementation.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	26	//
				27	//===----------------------------------------------------------------------===//
				28	#ifndef _MSC_VER
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	29	const int32_t UnsignedFloatBase::MaxExponent;
				30	const int32_t UnsignedFloatBase::MinExponent;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	31	#endif
				32
				33	static void appendDigit(std::string &Str, unsigned D) {
				34	assert(D < 10);
				35	Str += '0' + D % 10;
				36	}
				37
				38	static void appendNumber(std::string &Str, uint64_t N) {
				39	while (N) {
				40	appendDigit(Str, N % 10);
				41	N /= 10;
				42	}
				43	}
				44
				45	static bool doesRoundUp(char Digit) {
				46	switch (Digit) {
				47	case '5':
				48	case '6':
				49	case '7':
				50	case '8':
				51	case '9':
				52	return true;
				53	default:
				54	return false;
				55	}
				56	}
				57
				58	static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	59	assert(E >= UnsignedFloatBase::MinExponent);
				60	assert(E <= UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	61
				62	// Find a new E, but don't let it increase past MaxExponent.
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	63	int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
				64	int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	65	int Shift = 63 - (NewE - E);
				66	assert(Shift <= LeadingZeros);
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	67	assert(Shift == LeadingZeros \|\| NewE == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	68	D <<= Shift;
				69	E = NewE;
				70
				71	// Check for a denormal.
				72	unsigned AdjustedE = E + 16383;
				73	if (!(D >> 63)) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	74	assert(E == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	75	AdjustedE = 0;
				76	}
				77
				78	// Build the float and print it.
				79	uint64_t RawBits[2] = {D, AdjustedE};
				80	APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
				81	SmallVector<char, 24> Chars;
				82	Float.toString(Chars, Precision, 0);
				83	return std::string(Chars.begin(), Chars.end());
				84	}
				85
				86	static std::string stripTrailingZeros(const std::string &Float) {
				87	size_t NonZero = Float.find_last_not_of('0');
				88	assert(NonZero != std::string::npos && "no . in floating point string");
				89
				90	if (Float[NonZero] == '.')
				91	++NonZero;
				92
				93	return Float.substr(0, NonZero + 1);
				94	}
				95
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	96	std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	97	unsigned Precision) {
				98	if (!D)
				99	return "0.0";
				100
				101	// Canonicalize exponent and digits.
				102	uint64_t Above0 = 0;
				103	uint64_t Below0 = 0;
				104	uint64_t Extra = 0;
				105	int ExtraShift = 0;
				106	if (E == 0) {
				107	Above0 = D;
				108	} else if (E > 0) {
				109	if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
				110	D <<= Shift;
				111	E -= Shift;
				112
				113	if (!E)
				114	Above0 = D;
				115	}
				116	} else if (E > -64) {
				117	Above0 = D >> -E;
				118	Below0 = D << (64 + E);
				119	} else if (E > -120) {
				120	Below0 = D >> (-E - 64);
				121	Extra = D << (128 + E);
				122	ExtraShift = -64 - E;
				123	}
				124
				125	// Fall back on APFloat for very small and very large numbers.
				126	if (!Above0 && !Below0)
				127	return toStringAPFloat(D, E, Precision);
				128
				129	// Append the digits before the decimal.
				130	std::string Str;
				131	size_t DigitsOut = 0;
				132	if (Above0) {
				133	appendNumber(Str, Above0);
				134	DigitsOut = Str.size();
				135	} else
				136	appendDigit(Str, 0);
				137	std::reverse(Str.begin(), Str.end());
				138
				139	// Return early if there's nothing after the decimal.
				140	if (!Below0)
				141	return Str + ".0";
				142
				143	// Append the decimal and beyond.
				144	Str += '.';
				145	uint64_t Error = UINT64_C(1) << (64 - Width);
				146
				147	// We need to shift Below0 to the right to make space for calculating
				148	// digits. Save the precision we're losing in Extra.
				149	Extra = (Below0 & 0xf) << 56 \| (Extra >> 8);
				150	Below0 >>= 4;
				151	size_t SinceDot = 0;
				152	size_t AfterDot = Str.size();
				153	do {
				154	if (ExtraShift) {
				155	--ExtraShift;
				156	Error *= 5;
				157	} else
				158	Error *= 10;
				159
				160	Below0 *= 10;
				161	Extra *= 10;
				162	Below0 += (Extra >> 60);
				163	Extra = Extra & (UINT64_MAX >> 4);
				164	appendDigit(Str, Below0 >> 60);
				165	Below0 = Below0 & (UINT64_MAX >> 4);
				166	if (DigitsOut \|\| Str.back() != '0')
				167	++DigitsOut;
				168	++SinceDot;
				169	} while (Error && (Below0 << 4 \| Extra >> 60) >= Error / 2 &&
				170	(!Precision \|\| DigitsOut <= Precision \|\| SinceDot < 2));
				171
				172	// Return early for maximum precision.
				173	if (!Precision \|\| DigitsOut <= Precision)
				174	return stripTrailingZeros(Str);
				175
				176	// Find where to truncate.
				177	size_t Truncate =
				178	std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
				179
				180	// Check if there's anything to truncate.
				181	if (Truncate >= Str.size())
				182	return stripTrailingZeros(Str);
				183
				184	bool Carry = doesRoundUp(Str[Truncate]);
				185	if (!Carry)
				186	return stripTrailingZeros(Str.substr(0, Truncate));
				187
				188	// Round with the first truncated digit.
				189	for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
				190	I != E; ++I) {
				191	if (*I == '.')
				192	continue;
				193	if (*I == '9') {
				194	*I = '0';
				195	continue;
				196	}
				197
				198	++*I;
				199	Carry = false;
				200	break;
				201	}
				202
				203	// Add "1" in front if we still need to carry.
				204	return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
				205	}
				206
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	207	raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	208	int Width, unsigned Precision) {
				209	return OS << toString(D, E, Width, Precision);
				210	}
				211
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	212	void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	213	print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
				214	<< "]";
				215	}
				216
				217	static std::pair<uint64_t, int16_t>
				218	getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
				219	if (ShouldRound)
				220	if (!++N)
				221	// Rounding caused an overflow.
				222	return std::make_pair(UINT64_C(1), Shift + 64);
				223	return std::make_pair(N, Shift);
				224	}
				225
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	226	std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	227	uint64_t Divisor) {
				228	// Input should be sanitized.
				229	assert(Divisor);
				230	assert(Dividend);
				231
				232	// Minimize size of divisor.
				233	int16_t Shift = 0;
				234	if (int Zeros = countTrailingZeros(Divisor)) {
				235	Shift -= Zeros;
				236	Divisor >>= Zeros;
				237	}
				238
				239	// Check for powers of two.
				240	if (Divisor == 1)
				241	return std::make_pair(Dividend, Shift);
				242
				243	// Maximize size of dividend.
				244	if (int Zeros = countLeadingZeros64(Dividend)) {
				245	Shift -= Zeros;
				246	Dividend <<= Zeros;
				247	}
				248
				249	// Start with the result of a divide.
				250	uint64_t Quotient = Dividend / Divisor;
				251	Dividend %= Divisor;
				252
				253	// Continue building the quotient with long division.
				254	//
				255	// TODO: continue with largers digits.
				256	while (!(Quotient >> 63) && Dividend) {
				257	// Shift Dividend, and check for overflow.
				258	bool IsOverflow = Dividend >> 63;
				259	Dividend <<= 1;
				260	--Shift;
				261
				262	// Divide.
				263	bool DoesDivide = IsOverflow \|\| Divisor <= Dividend;
				264	Quotient = (Quotient << 1) \| uint64_t(DoesDivide);
				265	Dividend -= DoesDivide ? Divisor : 0;
				266	}
				267
				268	// Round.
				269	if (Dividend >= getHalf(Divisor))
				270	if (!++Quotient)
				271	// Rounding caused an overflow in Quotient.
				272	return std::make_pair(UINT64_C(1), Shift + 64);
				273
				274	return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
				275	}
				276
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	277	std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	278	uint64_t R) {
				279	// Separate into two 32-bit digits (U.L).
				280	uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
				281
				282	// Compute cross products.
				283	uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
				284
				285	// Sum into two 64-bit digits.
				286	uint64_t Upper = P1, Lower = P4;
				287	auto addWithCarry = [&](uint64_t N) {
				288	uint64_t NewLower = Lower + (N << 32);
				289	Upper += (N >> 32) + (NewLower < Lower);
				290	Lower = NewLower;
				291	};
				292	addWithCarry(P2);
				293	addWithCarry(P3);
				294
				295	// Check whether the upper digit is empty.
				296	if (!Upper)
				297	return std::make_pair(Lower, 0);
				298
				299	// Shift as little as possible to maximize precision.
				300	unsigned LeadingZeros = countLeadingZeros64(Upper);
				301	int16_t Shift = 64 - LeadingZeros;
				302	if (LeadingZeros)
				303	Upper = Upper << LeadingZeros \| Lower >> Shift;
				304	bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
				305	return getRoundedFloat(Upper, ShouldRound, Shift);
				306	}
				307
				308	//===----------------------------------------------------------------------===//
				309	//
				310	// BlockMass implementation.
				311	//
				312	//===----------------------------------------------------------------------===//
				313	BlockMass &BlockMass::operator*=(const BranchProbability &P) {
				314	uint32_t N = P.getNumerator(), D = P.getDenominator();
				315	assert(D && "divide by 0");
				316	assert(N <= D && "fraction greater than 1");
				317
				318	// Fast path for multiplying by 1.0.
				319	if (!Mass \|\| N == D)
				320	return *this;
				321
				322	// Get as much precision as we can.
				323	int Shift = countLeadingZeros(Mass);
				324	uint64_t ShiftedQuotient = (Mass << Shift) / D;
				325	uint64_t Product = ShiftedQuotient * N >> Shift;
				326
				327	// Now check for what's lost.
				328	uint64_t Left = ShiftedQuotient * (D - N) >> Shift;
				329	uint64_t Lost = Mass - Product - Left;
				330
				331	// TODO: prove this assertion.
				332	assert(Lost <= UINT32_MAX);
				333
				334	// Take the product plus a portion of the spoils.
				335	Mass = Product + Lost * N / D;
				336	return *this;
				337	}
				338
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	339	UnsignedFloat<uint64_t> BlockMass::toFloat() const {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	340	if (isFull())
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	341	return UnsignedFloat<uint64_t>(1, 0);
				342	return UnsignedFloat<uint64_t>(getMass() + 1, -64);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	343	}
				344
				345	void BlockMass::dump() const { print(dbgs()); }
				346
				347	static char getHexDigit(int N) {
				348	assert(N < 16);
				349	if (N < 10)
				350	return '0' + N;
				351	return 'a' + N - 10;
				352	}
				353	raw_ostream &BlockMass::print(raw_ostream &OS) const {
				354	for (int Digits = 0; Digits < 16; ++Digits)
				355	OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
				356	return OS;
				357	}
				358
				359	//===----------------------------------------------------------------------===//
				360	//
				361	// BlockFrequencyInfoImpl implementation.
				362	//
				363	//===----------------------------------------------------------------------===//
				364	namespace {
				365
				366	typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
				367	typedef BlockFrequencyInfoImplBase::Distribution Distribution;
				368	typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
				369	typedef BlockFrequencyInfoImplBase::Float Float;
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	370	typedef BlockFrequencyInfoImplBase::LoopData LoopData;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	371	typedef BlockFrequencyInfoImplBase::Weight Weight;
				372	typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
				373
				374	/// \brief Dithering mass distributer.
				375	///
				376	/// This class splits up a single mass into portions by weight, dithering to
				377	/// spread out error. No mass is lost. The dithering precision depends on the
				378	/// precision of the product of \a BlockMass and \a BranchProbability.
				379	///
				380	/// The distribution algorithm follows.
				381	///
				382	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
				383	/// mass to distribute in \a RemMass.
				384	///
				385	/// 2. For each portion:
				386	///
				387	/// 1. Construct a branch probability, P, as the portion's weight divided
				388	/// by the current value of \a RemWeight.
				389	/// 2. Calculate the portion's mass as \a RemMass times P.
				390	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
				391	/// the current portion's weight and mass.
				392	///
				393	/// Mass is distributed in two ways: full distribution and forward
				394	/// distribution. The latter ignores backedges, and uses the parallel fields
				395	/// \a RemForwardWeight and \a RemForwardMass.
				396	struct DitheringDistributer {
				397	uint32_t RemWeight;
				398	uint32_t RemForwardWeight;
				399
				400	BlockMass RemMass;
				401	BlockMass RemForwardMass;
				402
				403	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
				404
				405	BlockMass takeLocalMass(uint32_t Weight) {
				406	(void)takeMass(Weight);
				407	return takeForwardMass(Weight);
				408	}
				409	BlockMass takeExitMass(uint32_t Weight) {
				410	(void)takeForwardMass(Weight);
				411	return takeMass(Weight);
				412	}
				413	BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
				414
				415	private:
				416	BlockMass takeForwardMass(uint32_t Weight);
				417	BlockMass takeMass(uint32_t Weight);
				418	};
				419	}
				420
				421	DitheringDistributer::DitheringDistributer(Distribution &Dist,
				422	const BlockMass &Mass) {
				423	Dist.normalize();
				424	RemWeight = Dist.Total;
				425	RemForwardWeight = Dist.ForwardTotal;
				426	RemMass = Mass;
				427	RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
				428	}
				429
				430	BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
				431	// Compute the amount of mass to take.
				432	assert(Weight && "invalid weight");
				433	assert(Weight <= RemForwardWeight);
				434	BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
				435
				436	// Decrement totals (dither).
				437	RemForwardWeight -= Weight;
				438	RemForwardMass -= Mass;
				439	return Mass;
				440	}
				441	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
				442	assert(Weight && "invalid weight");
				443	assert(Weight <= RemWeight);
				444	BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
				445
				446	// Decrement totals (dither).
				447	RemWeight -= Weight;
				448	RemMass -= Mass;
				449	return Mass;
				450	}
				451
				452	void Distribution::add(const BlockNode &Node, uint64_t Amount,
				453	Weight::DistType Type) {
				454	assert(Amount && "invalid weight of 0");
				455	uint64_t NewTotal = Total + Amount;
				456
				457	// Check for overflow. It should be impossible to overflow twice.
				458	bool IsOverflow = NewTotal < Total;
				459	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
				460	DidOverflow \|= IsOverflow;
				461
				462	// Update the total.
				463	Total = NewTotal;
				464
				465	// Save the weight.
				466	Weight W;
				467	W.TargetNode = Node;
				468	W.Amount = Amount;
				469	W.Type = Type;
				470	Weights.push_back(W);
				471
				472	if (Type == Weight::Backedge)
				473	return;
				474
				475	// Update forward total. Don't worry about overflow here, since then Total
				476	// will exceed 32-bits and they'll both be recomputed in normalize().
				477	ForwardTotal += Amount;
				478	}
				479
				480	static void combineWeight(Weight &W, const Weight &OtherW) {
				481	assert(OtherW.TargetNode.isValid());
				482	if (!W.Amount) {
				483	W = OtherW;
				484	return;
				485	}
				486	assert(W.Type == OtherW.Type);
				487	assert(W.TargetNode == OtherW.TargetNode);
				488	assert(W.Amount < W.Amount + OtherW.Amount);
				489	W.Amount += OtherW.Amount;
				490	}
				491	static void combineWeightsBySorting(WeightList &Weights) {
				492	// Sort so edges to the same node are adjacent.
				493	std::sort(Weights.begin(), Weights.end(),
				494	[](const Weight &L,
				495	const Weight &R) { return L.TargetNode < R.TargetNode; });
				496
				497	// Combine adjacent edges.
				498	WeightList::iterator O = Weights.begin();
				499	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
				500	++O, (I = L)) {
				501	O = I;
				502
				503	// Find the adjacent weights to the same node.
				504	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
				505	combineWeight(O, L);
				506	}
				507
				508	// Erase extra entries.
				509	Weights.erase(O, Weights.end());
				510	return;
				511	}
				512	static void combineWeightsByHashing(WeightList &Weights) {
				513	// Collect weights into a DenseMap.
				514	typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
				515	HashTable Combined(NextPowerOf2(2 * Weights.size()));
				516	for (const Weight &W : Weights)
				517	combineWeight(Combined[W.TargetNode.Index], W);
				518
				519	// Check whether anything changed.
				520	if (Weights.size() == Combined.size())
				521	return;
				522
				523	// Fill in the new weights.
				524	Weights.clear();
				525	Weights.reserve(Combined.size());
				526	for (const auto &I : Combined)
				527	Weights.push_back(I.second);
				528	}
				529	static void combineWeights(WeightList &Weights) {
				530	// Use a hash table for many successors to keep this linear.
				531	if (Weights.size() > 128) {
				532	combineWeightsByHashing(Weights);
				533	return;
				534	}
				535
				536	combineWeightsBySorting(Weights);
				537	}
				538	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
				539	assert(Shift >= 0);
				540	assert(Shift < 64);
				541	if (!Shift)
				542	return N;
				543	return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
				544	}
				545	void Distribution::normalize() {
				546	// Early exit for termination nodes.
				547	if (Weights.empty())
				548	return;
				549
				550	// Only bother if there are multiple successors.
				551	if (Weights.size() > 1)
				552	combineWeights(Weights);
				553
				554	// Early exit when combined into a single successor.
				555	if (Weights.size() == 1) {
				556	Total = 1;
				557	ForwardTotal = Weights.front().Type != Weight::Backedge;
				558	Weights.front().Amount = 1;
				559	return;
				560	}
				561
				562	// Determine how much to shift right so that the total fits into 32-bits.
				563	//
				564	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
				565	// for each weight can cause a 32-bit overflow.
				566	int Shift = 0;
				567	if (DidOverflow)
				568	Shift = 33;
				569	else if (Total > UINT32_MAX)
				570	Shift = 33 - countLeadingZeros(Total);
				571
				572	// Early exit if nothing needs to be scaled.
				573	if (!Shift)
				574	return;
				575
				576	// Recompute the total through accumulation (rather than shifting it) so that
				577	// it's accurate after shifting. ForwardTotal is dirty here anyway.
				578	Total = 0;
				579	ForwardTotal = 0;
				580
				581	// Sum the weights to each node and shift right if necessary.
				582	for (Weight &W : Weights) {
				583	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
				584	// can round here without concern about overflow.
				585	assert(W.TargetNode.isValid());
				586	W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
				587	assert(W.Amount <= UINT32_MAX);
				588
				589	// Update the total.
				590	Total += W.Amount;
				591	if (W.Type == Weight::Backedge)
				592	continue;
				593
				594	// Update the forward total.
				595	ForwardTotal += W.Amount;
				596	}
				597	assert(Total <= UINT32_MAX);
				598	}
				599
				600	void BlockFrequencyInfoImplBase::clear() {
Duncan P. N. Exon Smith	dc2d66e	2014-04-22 03:31:34 +0000	[diff] [blame]	601	// Swap with a default-constructed std::vector, since std::vector<>::clear()
				602	// does not actually clear heap storage.
				603	std::vector<FrequencyData>().swap(Freqs);
				604	std::vector<WorkingData>().swap(Working);
Duncan P. N. Exon Smith	fc7dc93	2014-04-25 04:30:06 +0000	[diff] [blame]	605	Loops.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	606	}
				607
				608	/// \brief Clear all memory not needed downstream.
				609	///
				610	/// Releases all memory not used downstream. In particular, saves Freqs.
				611	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
				612	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
				613	BFI.clear();
				614	BFI.Freqs = std::move(SavedFreqs);
				615	}
				616
				617	/// \brief Get a possibly packaged node.
				618	///
				619	/// Get the node currently representing Node, which could be a containing
				620	/// loop.
				621	///
				622	/// This function should only be called when distributing mass. As long as
				623	/// there are no irreducilbe edges to Node, then it will have complexity O(1)
				624	/// in this context.
				625	///
				626	/// In general, the complexity is O(L), where L is the number of loop headers
				627	/// Node has been packaged into. Since this method is called in the context
				628	/// of distributing mass, L will be the number of loop headers an early exit
				629	/// edge jumps out of.
				630	static BlockNode getPackagedNode(const BlockFrequencyInfoImplBase &BFI,
				631	const BlockNode &Node) {
				632	assert(Node.isValid());
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	633	if (!BFI.Working[Node.Index].isPackaged())
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	634	return Node;
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	635	if (!BFI.Working[Node.Index].isAPackage())
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	636	return Node;
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	637	return getPackagedNode(BFI, BFI.Working[Node.Index].getContainingHeader());
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	638	}
				639
				640	/// \brief Get the appropriate mass for a possible pseudo-node loop package.
				641	///
				642	/// Get appropriate mass for Node. If Node is a loop-header (whose loop has
				643	/// been packaged), returns the mass of its pseudo-node. If it's a node inside
				644	/// a packaged loop, it returns the loop's pseudo-node.
				645	static BlockMass &getPackageMass(BlockFrequencyInfoImplBase &BFI,
				646	const BlockNode &Node) {
				647	assert(Node.isValid());
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	648	assert(!BFI.Working[Node.Index].isPackaged());
Duncan P. N. Exon Smith	e142363	2014-04-22 03:31:37 +0000	[diff] [blame]	649	if (!BFI.Working[Node.Index].isAPackage())
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	650	return BFI.Working[Node.Index].Mass;
				651
				652	return BFI.getLoopPackage(Node).Mass;
				653	}
				654
				655	void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	656	const LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	657	const BlockNode &Pred,
				658	const BlockNode &Succ,
				659	uint64_t Weight) {
				660	if (!Weight)
				661	Weight = 1;
				662
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	663	BlockNode LoopHead;
				664	if (OuterLoop)
				665	LoopHead = OuterLoop->Header;
				666
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	667	#ifndef NDEBUG
				668	auto debugSuccessor = [&](const char *Type, const BlockNode &Resolved) {
				669	dbgs() << " =>"
				670	<< " [" << Type << "] weight = " << Weight;
				671	if (Succ != LoopHead)
				672	dbgs() << ", succ = " << getBlockName(Succ);
				673	if (Resolved != Succ)
				674	dbgs() << ", resolved = " << getBlockName(Resolved);
				675	dbgs() << "\n";
				676	};
				677	(void)debugSuccessor;
				678	#endif
				679
				680	if (Succ == LoopHead) {
				681	DEBUG(debugSuccessor("backedge", Succ));
				682	Dist.addBackedge(LoopHead, Weight);
				683	return;
				684	}
				685	BlockNode Resolved = getPackagedNode(*this, Succ);
				686	assert(Resolved != LoopHead);
				687
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	688	if (Working[Resolved.Index].getContainingHeader() != LoopHead) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	689	DEBUG(debugSuccessor(" exit ", Resolved));
				690	Dist.addExit(Resolved, Weight);
				691	return;
				692	}
				693
Duncan P. N. Exon Smith	b3380ea	2014-04-22 03:31:53 +0000	[diff] [blame]	694	if (Resolved < Pred) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	695	// Irreducible backedge. Skip this edge in the distribution.
				696	DEBUG(debugSuccessor("skipped ", Resolved));
				697	return;
				698	}
				699
				700	DEBUG(debugSuccessor(" local ", Resolved));
				701	Dist.addLocal(Resolved, Weight);
				702	}
				703
				704	void BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	705	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	706	// Copy the exit map into Dist.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	707	for (const auto &I : Loop.Exits)
				708	addToDist(Dist, OuterLoop, Loop.Header, I.first, I.second.getMass());
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	709
				710	// We don't need this map any more. Clear it to prevent quadratic memory
				711	// usage in deeply nested loops with irreducible control flow.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	712	Loop.Exits.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	713	}
				714
				715	/// \brief Get the maximum allowed loop scale.
				716	///
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	717	/// Gives the maximum number of estimated iterations allowed for a loop. Very
				718	/// large numbers cause problems downstream (even within 64-bits).
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	719	static Float getMaxLoopScale() { return Float(1, 12); }
				720
				721	/// \brief Compute the loop scale for a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	722	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	723	// Compute loop scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	724	DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(Loop.Header) << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	725
				726	// LoopScale == 1 / ExitMass
				727	// ExitMass == HeadMass - BackedgeMass
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	728	BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	729
				730	// Block scale stores the inverse of the scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	731	Loop.Scale = ExitMass.toFloat().inverse();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	732
				733	DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	734	<< " - " << Loop.BackedgeMass << ")\n"
				735	<< " - scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	736
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	737	if (Loop.Scale > getMaxLoopScale()) {
				738	Loop.Scale = getMaxLoopScale();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	739	DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
				740	}
				741	}
				742
				743	/// \brief Package up a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	744	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
				745	DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.Header) << "\n");
				746	Loop.IsPackaged = true;
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	747	DEBUG(for (const BlockNode &M
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	748	: Loop.Members) {
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	749	dbgs() << " - node: " << getBlockName(M.Index) << "\n";
				750	});
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	751	}
				752
				753	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	754	LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	755	Distribution &Dist) {
				756	BlockMass Mass = getPackageMass(*this, Source);
				757	DEBUG(dbgs() << " => mass: " << Mass
				758	<< " ( general \| forward )\n");
				759
				760	// Distribute mass to successors as laid out in Dist.
				761	DitheringDistributer D(Dist, Mass);
				762
				763	#ifndef NDEBUG
				764	auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
				765	const char *Desc) {
				766	dbgs() << " => assign " << M << " (" << D.RemMass << "\|"
				767	<< D.RemForwardMass << ")";
				768	if (Desc)
				769	dbgs() << " [" << Desc << "]";
				770	if (T.isValid())
				771	dbgs() << " to " << getBlockName(T);
				772	dbgs() << "\n";
				773	};
				774	(void)debugAssign;
				775	#endif
				776
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	777	BlockNode LoopHead;
				778	if (OuterLoop)
				779	LoopHead = OuterLoop->Header;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	780	for (const Weight &W : Dist.Weights) {
				781	// Check for a local edge (forward and non-exit).
				782	if (W.Type == Weight::Local) {
				783	BlockMass Local = D.takeLocalMass(W.Amount);
				784	getPackageMass(*this, W.TargetNode) += Local;
				785	DEBUG(debugAssign(W.TargetNode, Local, nullptr));
				786	continue;
				787	}
				788
				789	// Backedges and exits only make sense if we're processing a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	790	assert(OuterLoop && "backedge or exit outside of loop");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	791
				792	// Check for a backedge.
				793	if (W.Type == Weight::Backedge) {
				794	BlockMass Back = D.takeBackedgeMass(W.Amount);
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	795	OuterLoop->BackedgeMass += Back;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	796	DEBUG(debugAssign(BlockNode(), Back, "back"));
				797	continue;
				798	}
				799
				800	// This must be an exit.
				801	assert(W.Type == Weight::Exit);
				802	BlockMass Exit = D.takeExitMass(W.Amount);
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame^]	803	OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Exit));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	804	DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
				805	}
				806	}
				807
				808	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
				809	const Float &Min, const Float &Max) {
				810	// Scale the Factor to a size that creates integers. Ideally, integers would
				811	// be scaled so that Max == UINT64_MAX so that they can be best
				812	// differentiated. However, the register allocator currently deals poorly
				813	// with large numbers. Instead, push Min up a little from 1 to give some
				814	// room to differentiate small, unequal numbers.
				815	//
				816	// TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
				817	Float ScalingFactor = Min.inverse();
				818	if ((Max / Min).lg() < 60)
				819	ScalingFactor <<= 3;
				820
				821	// Translate the floats to integers.
				822	DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
				823	<< ", factor = " << ScalingFactor << "\n");
				824	for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
				825	Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
				826	BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
				827	DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
				828	<< BFI.Freqs[Index].Floating << ", scaled = " << Scaled
				829	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
				830	}
				831	}
				832
				833	static void scaleBlockData(BlockFrequencyInfoImplBase &BFI,
				834	const BlockNode &Node,
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	835	const LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	836	Float F = Loop.Mass.toFloat() * Loop.Scale;
				837
				838	Float &Current = BFI.Freqs[Node.Index].Floating;
				839	Float Updated = Current * F;
				840
				841	DEBUG(dbgs() << " - " << BFI.getBlockName(Node) << ": " << Current << " => "
				842	<< Updated << "\n");
				843
				844	Current = Updated;
				845	}
				846
				847	/// \brief Unwrap a loop package.
				848	///
				849	/// Visits all the members of a loop, adjusting their BlockData according to
				850	/// the loop's pseudo-node.
				851	static void unwrapLoopPackage(BlockFrequencyInfoImplBase &BFI,
				852	const BlockNode &Head) {
				853	assert(Head.isValid());
				854
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	855	LoopData &LoopPackage = BFI.getLoopPackage(Head);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	856	DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Head)
				857	<< ": mass = " << LoopPackage.Mass
				858	<< ", scale = " << LoopPackage.Scale << "\n");
				859	scaleBlockData(BFI, Head, LoopPackage);
				860
				861	// Propagate the head scale through the loop. Since members are visited in
				862	// RPO, the head scale will be updated by the loop scale first, and then the
				863	// final head scale will be used for updated the rest of the members.
				864	for (const BlockNode &M : LoopPackage.Members) {
				865	const FrequencyData &HeadData = BFI.Freqs[Head.Index];
				866	FrequencyData &Freqs = BFI.Freqs[M.Index];
				867	Float NewFreq = Freqs.Floating * HeadData.Floating;
				868	DEBUG(dbgs() << " - " << BFI.getBlockName(M) << ": " << Freqs.Floating
				869	<< " => " << NewFreq << "\n");
				870	Freqs.Floating = NewFreq;
				871	}
				872	}
				873
				874	void BlockFrequencyInfoImplBase::finalizeMetrics() {
				875	// Set initial frequencies from loop-local masses.
				876	for (size_t Index = 0; Index < Working.size(); ++Index)
				877	Freqs[Index].Floating = Working[Index].Mass.toFloat();
				878
				879	// Unwrap loop packages in reverse post-order, tracking min and max
				880	// frequencies.
				881	auto Min = Float::getLargest();
				882	auto Max = Float::getZero();
				883	for (size_t Index = 0; Index < Working.size(); ++Index) {
				884	if (Working[Index].isLoopHeader())
				885	unwrapLoopPackage(*this, BlockNode(Index));
				886
				887	// Update max scale.
				888	Min = std::min(Min, Freqs[Index].Floating);
				889	Max = std::max(Max, Freqs[Index].Floating);
				890	}
				891
				892	// Convert to integers.
				893	convertFloatingToInteger(*this, Min, Max);
				894
				895	// Clean up data structures.
				896	cleanup(*this);
				897
				898	// Print out the final stats.
				899	DEBUG(dump());
				900	}
				901
				902	BlockFrequency
				903	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
				904	if (!Node.isValid())
				905	return 0;
				906	return Freqs[Node.Index].Integer;
				907	}
				908	Float
				909	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
				910	if (!Node.isValid())
				911	return Float::getZero();
				912	return Freqs[Node.Index].Floating;
				913	}
				914
				915	std::string
				916	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
				917	return std::string();
				918	}
				919
				920	raw_ostream &
				921	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				922	const BlockNode &Node) const {
				923	return OS << getFloatingBlockFreq(Node);
				924	}
				925
				926	raw_ostream &
				927	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				928	const BlockFrequency &Freq) const {
				929	Float Block(Freq.getFrequency(), 0);
				930	Float Entry(getEntryFreq(), 0);
				931
				932	return OS << Block / Entry;
				933	}