Blame - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp - toolchain/llvm-project

blob: e953b43e37559d8d4392a7ffa92841635060eca0 [file] [log] [blame]

Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Loops should be simplified before this analysis.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	14	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
				15	#include "llvm/ADT/APFloat.h"
				16	#include "llvm/Support/raw_ostream.h"
				17	#include <deque>
				18
				19	using namespace llvm;
				20
Chandler Carruth	1b9dde0	2014-04-22 02:02:50 +0000	[diff] [blame]	21	#define DEBUG_TYPE "block-freq"
				22
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	23	//===----------------------------------------------------------------------===//
				24	//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	25	// UnsignedFloat implementation.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	26	//
				27	//===----------------------------------------------------------------------===//
				28	#ifndef _MSC_VER
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	29	const int32_t UnsignedFloatBase::MaxExponent;
				30	const int32_t UnsignedFloatBase::MinExponent;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	31	#endif
				32
				33	static void appendDigit(std::string &Str, unsigned D) {
				34	assert(D < 10);
				35	Str += '0' + D % 10;
				36	}
				37
				38	static void appendNumber(std::string &Str, uint64_t N) {
				39	while (N) {
				40	appendDigit(Str, N % 10);
				41	N /= 10;
				42	}
				43	}
				44
				45	static bool doesRoundUp(char Digit) {
				46	switch (Digit) {
				47	case '5':
				48	case '6':
				49	case '7':
				50	case '8':
				51	case '9':
				52	return true;
				53	default:
				54	return false;
				55	}
				56	}
				57
				58	static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	59	assert(E >= UnsignedFloatBase::MinExponent);
				60	assert(E <= UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	61
				62	// Find a new E, but don't let it increase past MaxExponent.
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	63	int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
				64	int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	65	int Shift = 63 - (NewE - E);
				66	assert(Shift <= LeadingZeros);
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	67	assert(Shift == LeadingZeros \|\| NewE == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	68	D <<= Shift;
				69	E = NewE;
				70
				71	// Check for a denormal.
				72	unsigned AdjustedE = E + 16383;
				73	if (!(D >> 63)) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	74	assert(E == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	75	AdjustedE = 0;
				76	}
				77
				78	// Build the float and print it.
				79	uint64_t RawBits[2] = {D, AdjustedE};
				80	APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
				81	SmallVector<char, 24> Chars;
				82	Float.toString(Chars, Precision, 0);
				83	return std::string(Chars.begin(), Chars.end());
				84	}
				85
				86	static std::string stripTrailingZeros(const std::string &Float) {
				87	size_t NonZero = Float.find_last_not_of('0');
				88	assert(NonZero != std::string::npos && "no . in floating point string");
				89
				90	if (Float[NonZero] == '.')
				91	++NonZero;
				92
				93	return Float.substr(0, NonZero + 1);
				94	}
				95
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	96	std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	97	unsigned Precision) {
				98	if (!D)
				99	return "0.0";
				100
				101	// Canonicalize exponent and digits.
				102	uint64_t Above0 = 0;
				103	uint64_t Below0 = 0;
				104	uint64_t Extra = 0;
				105	int ExtraShift = 0;
				106	if (E == 0) {
				107	Above0 = D;
				108	} else if (E > 0) {
				109	if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
				110	D <<= Shift;
				111	E -= Shift;
				112
				113	if (!E)
				114	Above0 = D;
				115	}
				116	} else if (E > -64) {
				117	Above0 = D >> -E;
				118	Below0 = D << (64 + E);
				119	} else if (E > -120) {
				120	Below0 = D >> (-E - 64);
				121	Extra = D << (128 + E);
				122	ExtraShift = -64 - E;
				123	}
				124
				125	// Fall back on APFloat for very small and very large numbers.
				126	if (!Above0 && !Below0)
				127	return toStringAPFloat(D, E, Precision);
				128
				129	// Append the digits before the decimal.
				130	std::string Str;
				131	size_t DigitsOut = 0;
				132	if (Above0) {
				133	appendNumber(Str, Above0);
				134	DigitsOut = Str.size();
				135	} else
				136	appendDigit(Str, 0);
				137	std::reverse(Str.begin(), Str.end());
				138
				139	// Return early if there's nothing after the decimal.
				140	if (!Below0)
				141	return Str + ".0";
				142
				143	// Append the decimal and beyond.
				144	Str += '.';
				145	uint64_t Error = UINT64_C(1) << (64 - Width);
				146
				147	// We need to shift Below0 to the right to make space for calculating
				148	// digits. Save the precision we're losing in Extra.
				149	Extra = (Below0 & 0xf) << 56 \| (Extra >> 8);
				150	Below0 >>= 4;
				151	size_t SinceDot = 0;
				152	size_t AfterDot = Str.size();
				153	do {
				154	if (ExtraShift) {
				155	--ExtraShift;
				156	Error *= 5;
				157	} else
				158	Error *= 10;
				159
				160	Below0 *= 10;
				161	Extra *= 10;
				162	Below0 += (Extra >> 60);
				163	Extra = Extra & (UINT64_MAX >> 4);
				164	appendDigit(Str, Below0 >> 60);
				165	Below0 = Below0 & (UINT64_MAX >> 4);
				166	if (DigitsOut \|\| Str.back() != '0')
				167	++DigitsOut;
				168	++SinceDot;
				169	} while (Error && (Below0 << 4 \| Extra >> 60) >= Error / 2 &&
				170	(!Precision \|\| DigitsOut <= Precision \|\| SinceDot < 2));
				171
				172	// Return early for maximum precision.
				173	if (!Precision \|\| DigitsOut <= Precision)
				174	return stripTrailingZeros(Str);
				175
				176	// Find where to truncate.
				177	size_t Truncate =
				178	std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
				179
				180	// Check if there's anything to truncate.
				181	if (Truncate >= Str.size())
				182	return stripTrailingZeros(Str);
				183
				184	bool Carry = doesRoundUp(Str[Truncate]);
				185	if (!Carry)
				186	return stripTrailingZeros(Str.substr(0, Truncate));
				187
				188	// Round with the first truncated digit.
				189	for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
				190	I != E; ++I) {
				191	if (*I == '.')
				192	continue;
				193	if (*I == '9') {
				194	*I = '0';
				195	continue;
				196	}
				197
				198	++*I;
				199	Carry = false;
				200	break;
				201	}
				202
				203	// Add "1" in front if we still need to carry.
				204	return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
				205	}
				206
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	207	raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	208	int Width, unsigned Precision) {
				209	return OS << toString(D, E, Width, Precision);
				210	}
				211
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	212	void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	213	print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
				214	<< "]";
				215	}
				216
				217	static std::pair<uint64_t, int16_t>
				218	getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
				219	if (ShouldRound)
				220	if (!++N)
				221	// Rounding caused an overflow.
				222	return std::make_pair(UINT64_C(1), Shift + 64);
				223	return std::make_pair(N, Shift);
				224	}
				225
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	226	std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	227	uint64_t Divisor) {
				228	// Input should be sanitized.
				229	assert(Divisor);
				230	assert(Dividend);
				231
				232	// Minimize size of divisor.
				233	int16_t Shift = 0;
				234	if (int Zeros = countTrailingZeros(Divisor)) {
				235	Shift -= Zeros;
				236	Divisor >>= Zeros;
				237	}
				238
				239	// Check for powers of two.
				240	if (Divisor == 1)
				241	return std::make_pair(Dividend, Shift);
				242
				243	// Maximize size of dividend.
				244	if (int Zeros = countLeadingZeros64(Dividend)) {
				245	Shift -= Zeros;
				246	Dividend <<= Zeros;
				247	}
				248
				249	// Start with the result of a divide.
				250	uint64_t Quotient = Dividend / Divisor;
				251	Dividend %= Divisor;
				252
				253	// Continue building the quotient with long division.
				254	//
				255	// TODO: continue with largers digits.
				256	while (!(Quotient >> 63) && Dividend) {
				257	// Shift Dividend, and check for overflow.
				258	bool IsOverflow = Dividend >> 63;
				259	Dividend <<= 1;
				260	--Shift;
				261
				262	// Divide.
				263	bool DoesDivide = IsOverflow \|\| Divisor <= Dividend;
				264	Quotient = (Quotient << 1) \| uint64_t(DoesDivide);
				265	Dividend -= DoesDivide ? Divisor : 0;
				266	}
				267
				268	// Round.
				269	if (Dividend >= getHalf(Divisor))
				270	if (!++Quotient)
				271	// Rounding caused an overflow in Quotient.
				272	return std::make_pair(UINT64_C(1), Shift + 64);
				273
				274	return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
				275	}
				276
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	277	std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	278	uint64_t R) {
				279	// Separate into two 32-bit digits (U.L).
				280	uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
				281
				282	// Compute cross products.
				283	uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
				284
				285	// Sum into two 64-bit digits.
				286	uint64_t Upper = P1, Lower = P4;
				287	auto addWithCarry = [&](uint64_t N) {
				288	uint64_t NewLower = Lower + (N << 32);
				289	Upper += (N >> 32) + (NewLower < Lower);
				290	Lower = NewLower;
				291	};
				292	addWithCarry(P2);
				293	addWithCarry(P3);
				294
				295	// Check whether the upper digit is empty.
				296	if (!Upper)
				297	return std::make_pair(Lower, 0);
				298
				299	// Shift as little as possible to maximize precision.
				300	unsigned LeadingZeros = countLeadingZeros64(Upper);
				301	int16_t Shift = 64 - LeadingZeros;
				302	if (LeadingZeros)
				303	Upper = Upper << LeadingZeros \| Lower >> Shift;
				304	bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
				305	return getRoundedFloat(Upper, ShouldRound, Shift);
				306	}
				307
				308	//===----------------------------------------------------------------------===//
				309	//
				310	// BlockMass implementation.
				311	//
				312	//===----------------------------------------------------------------------===//
				313	BlockMass &BlockMass::operator*=(const BranchProbability &P) {
				314	uint32_t N = P.getNumerator(), D = P.getDenominator();
				315	assert(D && "divide by 0");
				316	assert(N <= D && "fraction greater than 1");
				317
				318	// Fast path for multiplying by 1.0.
				319	if (!Mass \|\| N == D)
				320	return *this;
				321
				322	// Get as much precision as we can.
				323	int Shift = countLeadingZeros(Mass);
				324	uint64_t ShiftedQuotient = (Mass << Shift) / D;
				325	uint64_t Product = ShiftedQuotient * N >> Shift;
				326
				327	// Now check for what's lost.
				328	uint64_t Left = ShiftedQuotient * (D - N) >> Shift;
				329	uint64_t Lost = Mass - Product - Left;
				330
				331	// TODO: prove this assertion.
				332	assert(Lost <= UINT32_MAX);
				333
				334	// Take the product plus a portion of the spoils.
				335	Mass = Product + Lost * N / D;
				336	return *this;
				337	}
				338
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	339	UnsignedFloat<uint64_t> BlockMass::toFloat() const {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	340	if (isFull())
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	341	return UnsignedFloat<uint64_t>(1, 0);
				342	return UnsignedFloat<uint64_t>(getMass() + 1, -64);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	343	}
				344
				345	void BlockMass::dump() const { print(dbgs()); }
				346
				347	static char getHexDigit(int N) {
				348	assert(N < 16);
				349	if (N < 10)
				350	return '0' + N;
				351	return 'a' + N - 10;
				352	}
				353	raw_ostream &BlockMass::print(raw_ostream &OS) const {
				354	for (int Digits = 0; Digits < 16; ++Digits)
				355	OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
				356	return OS;
				357	}
				358
				359	//===----------------------------------------------------------------------===//
				360	//
				361	// BlockFrequencyInfoImpl implementation.
				362	//
				363	//===----------------------------------------------------------------------===//
				364	namespace {
				365
				366	typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
				367	typedef BlockFrequencyInfoImplBase::Distribution Distribution;
				368	typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
				369	typedef BlockFrequencyInfoImplBase::Float Float;
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	370	typedef BlockFrequencyInfoImplBase::LoopData LoopData;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	371	typedef BlockFrequencyInfoImplBase::Weight Weight;
				372	typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
				373
				374	/// \brief Dithering mass distributer.
				375	///
				376	/// This class splits up a single mass into portions by weight, dithering to
				377	/// spread out error. No mass is lost. The dithering precision depends on the
				378	/// precision of the product of \a BlockMass and \a BranchProbability.
				379	///
				380	/// The distribution algorithm follows.
				381	///
				382	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
				383	/// mass to distribute in \a RemMass.
				384	///
				385	/// 2. For each portion:
				386	///
				387	/// 1. Construct a branch probability, P, as the portion's weight divided
				388	/// by the current value of \a RemWeight.
				389	/// 2. Calculate the portion's mass as \a RemMass times P.
				390	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
				391	/// the current portion's weight and mass.
				392	///
				393	/// Mass is distributed in two ways: full distribution and forward
				394	/// distribution. The latter ignores backedges, and uses the parallel fields
				395	/// \a RemForwardWeight and \a RemForwardMass.
				396	struct DitheringDistributer {
				397	uint32_t RemWeight;
				398	uint32_t RemForwardWeight;
				399
				400	BlockMass RemMass;
				401	BlockMass RemForwardMass;
				402
				403	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
				404
				405	BlockMass takeLocalMass(uint32_t Weight) {
				406	(void)takeMass(Weight);
				407	return takeForwardMass(Weight);
				408	}
				409	BlockMass takeExitMass(uint32_t Weight) {
				410	(void)takeForwardMass(Weight);
				411	return takeMass(Weight);
				412	}
				413	BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
				414
				415	private:
				416	BlockMass takeForwardMass(uint32_t Weight);
				417	BlockMass takeMass(uint32_t Weight);
				418	};
				419	}
				420
				421	DitheringDistributer::DitheringDistributer(Distribution &Dist,
				422	const BlockMass &Mass) {
				423	Dist.normalize();
				424	RemWeight = Dist.Total;
				425	RemForwardWeight = Dist.ForwardTotal;
				426	RemMass = Mass;
				427	RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
				428	}
				429
				430	BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
				431	// Compute the amount of mass to take.
				432	assert(Weight && "invalid weight");
				433	assert(Weight <= RemForwardWeight);
				434	BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
				435
				436	// Decrement totals (dither).
				437	RemForwardWeight -= Weight;
				438	RemForwardMass -= Mass;
				439	return Mass;
				440	}
				441	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
				442	assert(Weight && "invalid weight");
				443	assert(Weight <= RemWeight);
				444	BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
				445
				446	// Decrement totals (dither).
				447	RemWeight -= Weight;
				448	RemMass -= Mass;
				449	return Mass;
				450	}
				451
				452	void Distribution::add(const BlockNode &Node, uint64_t Amount,
				453	Weight::DistType Type) {
				454	assert(Amount && "invalid weight of 0");
				455	uint64_t NewTotal = Total + Amount;
				456
				457	// Check for overflow. It should be impossible to overflow twice.
				458	bool IsOverflow = NewTotal < Total;
				459	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
				460	DidOverflow \|= IsOverflow;
				461
				462	// Update the total.
				463	Total = NewTotal;
				464
				465	// Save the weight.
				466	Weight W;
				467	W.TargetNode = Node;
				468	W.Amount = Amount;
				469	W.Type = Type;
				470	Weights.push_back(W);
				471
				472	if (Type == Weight::Backedge)
				473	return;
				474
				475	// Update forward total. Don't worry about overflow here, since then Total
				476	// will exceed 32-bits and they'll both be recomputed in normalize().
				477	ForwardTotal += Amount;
				478	}
				479
				480	static void combineWeight(Weight &W, const Weight &OtherW) {
				481	assert(OtherW.TargetNode.isValid());
				482	if (!W.Amount) {
				483	W = OtherW;
				484	return;
				485	}
				486	assert(W.Type == OtherW.Type);
				487	assert(W.TargetNode == OtherW.TargetNode);
				488	assert(W.Amount < W.Amount + OtherW.Amount);
				489	W.Amount += OtherW.Amount;
				490	}
				491	static void combineWeightsBySorting(WeightList &Weights) {
				492	// Sort so edges to the same node are adjacent.
				493	std::sort(Weights.begin(), Weights.end(),
				494	[](const Weight &L,
				495	const Weight &R) { return L.TargetNode < R.TargetNode; });
				496
				497	// Combine adjacent edges.
				498	WeightList::iterator O = Weights.begin();
				499	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
				500	++O, (I = L)) {
				501	O = I;
				502
				503	// Find the adjacent weights to the same node.
				504	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
				505	combineWeight(O, L);
				506	}
				507
				508	// Erase extra entries.
				509	Weights.erase(O, Weights.end());
				510	return;
				511	}
				512	static void combineWeightsByHashing(WeightList &Weights) {
				513	// Collect weights into a DenseMap.
				514	typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
				515	HashTable Combined(NextPowerOf2(2 * Weights.size()));
				516	for (const Weight &W : Weights)
				517	combineWeight(Combined[W.TargetNode.Index], W);
				518
				519	// Check whether anything changed.
				520	if (Weights.size() == Combined.size())
				521	return;
				522
				523	// Fill in the new weights.
				524	Weights.clear();
				525	Weights.reserve(Combined.size());
				526	for (const auto &I : Combined)
				527	Weights.push_back(I.second);
				528	}
				529	static void combineWeights(WeightList &Weights) {
				530	// Use a hash table for many successors to keep this linear.
				531	if (Weights.size() > 128) {
				532	combineWeightsByHashing(Weights);
				533	return;
				534	}
				535
				536	combineWeightsBySorting(Weights);
				537	}
				538	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
				539	assert(Shift >= 0);
				540	assert(Shift < 64);
				541	if (!Shift)
				542	return N;
				543	return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
				544	}
				545	void Distribution::normalize() {
				546	// Early exit for termination nodes.
				547	if (Weights.empty())
				548	return;
				549
				550	// Only bother if there are multiple successors.
				551	if (Weights.size() > 1)
				552	combineWeights(Weights);
				553
				554	// Early exit when combined into a single successor.
				555	if (Weights.size() == 1) {
				556	Total = 1;
				557	ForwardTotal = Weights.front().Type != Weight::Backedge;
				558	Weights.front().Amount = 1;
				559	return;
				560	}
				561
				562	// Determine how much to shift right so that the total fits into 32-bits.
				563	//
				564	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
				565	// for each weight can cause a 32-bit overflow.
				566	int Shift = 0;
				567	if (DidOverflow)
				568	Shift = 33;
				569	else if (Total > UINT32_MAX)
				570	Shift = 33 - countLeadingZeros(Total);
				571
				572	// Early exit if nothing needs to be scaled.
				573	if (!Shift)
				574	return;
				575
				576	// Recompute the total through accumulation (rather than shifting it) so that
				577	// it's accurate after shifting. ForwardTotal is dirty here anyway.
				578	Total = 0;
				579	ForwardTotal = 0;
				580
				581	// Sum the weights to each node and shift right if necessary.
				582	for (Weight &W : Weights) {
				583	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
				584	// can round here without concern about overflow.
				585	assert(W.TargetNode.isValid());
				586	W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
				587	assert(W.Amount <= UINT32_MAX);
				588
				589	// Update the total.
				590	Total += W.Amount;
				591	if (W.Type == Weight::Backedge)
				592	continue;
				593
				594	// Update the forward total.
				595	ForwardTotal += W.Amount;
				596	}
				597	assert(Total <= UINT32_MAX);
				598	}
				599
				600	void BlockFrequencyInfoImplBase::clear() {
Duncan P. N. Exon Smith	dc2d66e	2014-04-22 03:31:34 +0000	[diff] [blame]	601	// Swap with a default-constructed std::vector, since std::vector<>::clear()
				602	// does not actually clear heap storage.
				603	std::vector<FrequencyData>().swap(Freqs);
				604	std::vector<WorkingData>().swap(Working);
Duncan P. N. Exon Smith	fc7dc93	2014-04-25 04:30:06 +0000	[diff] [blame]	605	Loops.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	606	}
				607
				608	/// \brief Clear all memory not needed downstream.
				609	///
				610	/// Releases all memory not used downstream. In particular, saves Freqs.
				611	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
				612	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
				613	BFI.clear();
				614	BFI.Freqs = std::move(SavedFreqs);
				615	}
				616
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	617	/// \brief Get the appropriate mass for a possible pseudo-node loop package.
				618	///
				619	/// Get appropriate mass for Node. If Node is a loop-header (whose loop has
				620	/// been packaged), returns the mass of its pseudo-node. If it's a node inside
				621	/// a packaged loop, it returns the loop's pseudo-node.
				622	static BlockMass &getPackageMass(BlockFrequencyInfoImplBase &BFI,
				623	const BlockNode &Node) {
				624	assert(Node.isValid());
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	625	assert(!BFI.Working[Node.Index].isPackaged());
Duncan P. N. Exon Smith	e142363	2014-04-22 03:31:37 +0000	[diff] [blame]	626	if (!BFI.Working[Node.Index].isAPackage())
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	627	return BFI.Working[Node.Index].Mass;
				628
				629	return BFI.getLoopPackage(Node).Mass;
				630	}
				631
				632	void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	633	const LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	634	const BlockNode &Pred,
				635	const BlockNode &Succ,
				636	uint64_t Weight) {
				637	if (!Weight)
				638	Weight = 1;
				639
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	640	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
				641	return OuterLoop && OuterLoop->isHeader(Node);
				642	};
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	643
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	644	#ifndef NDEBUG
				645	auto debugSuccessor = [&](const char *Type, const BlockNode &Resolved) {
				646	dbgs() << " =>"
				647	<< " [" << Type << "] weight = " << Weight;
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	648	if (!isLoopHeader(Succ))
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	649	dbgs() << ", succ = " << getBlockName(Succ);
				650	if (Resolved != Succ)
				651	dbgs() << ", resolved = " << getBlockName(Resolved);
				652	dbgs() << "\n";
				653	};
				654	(void)debugSuccessor;
				655	#endif
				656
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	657	if (isLoopHeader(Succ)) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	658	DEBUG(debugSuccessor("backedge", Succ));
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	659	Dist.addBackedge(OuterLoop->getHeader(), Weight);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	660	return;
				661	}
Duncan P. N. Exon Smith	c9b7cfea	2014-04-25 04:38:12 +0000	[diff] [blame]	662	BlockNode Resolved = getPackagedNode(Succ);
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	663	assert(!isLoopHeader(Resolved));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	664
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	665	if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	666	DEBUG(debugSuccessor(" exit ", Resolved));
				667	Dist.addExit(Resolved, Weight);
				668	return;
				669	}
				670
Duncan P. N. Exon Smith	b3380ea	2014-04-22 03:31:53 +0000	[diff] [blame]	671	if (Resolved < Pred) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	672	// Irreducible backedge. Skip this edge in the distribution.
				673	DEBUG(debugSuccessor("skipped ", Resolved));
				674	return;
				675	}
				676
				677	DEBUG(debugSuccessor(" local ", Resolved));
				678	Dist.addLocal(Resolved, Weight);
				679	}
				680
				681	void BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	682	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	683	// Copy the exit map into Dist.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	684	for (const auto &I : Loop.Exits)
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	685	addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, I.second.getMass());
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	686
				687	// We don't need this map any more. Clear it to prevent quadratic memory
				688	// usage in deeply nested loops with irreducible control flow.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	689	Loop.Exits.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	690	}
				691
				692	/// \brief Get the maximum allowed loop scale.
				693	///
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	694	/// Gives the maximum number of estimated iterations allowed for a loop. Very
				695	/// large numbers cause problems downstream (even within 64-bits).
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	696	static Float getMaxLoopScale() { return Float(1, 12); }
				697
				698	/// \brief Compute the loop scale for a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	699	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	700	// Compute loop scale.
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	701	DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(Loop.getHeader())
				702	<< "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	703
				704	// LoopScale == 1 / ExitMass
				705	// ExitMass == HeadMass - BackedgeMass
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	706	BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	707
				708	// Block scale stores the inverse of the scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	709	Loop.Scale = ExitMass.toFloat().inverse();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	710
				711	DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	712	<< " - " << Loop.BackedgeMass << ")\n"
				713	<< " - scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	714
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	715	if (Loop.Scale > getMaxLoopScale()) {
				716	Loop.Scale = getMaxLoopScale();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	717	DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
				718	}
				719	}
				720
				721	/// \brief Package up a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	722	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	723	DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.getHeader()) << "\n");
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	724	Loop.IsPackaged = true;
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	725	DEBUG(for (const BlockNode &M
Duncan P. N. Exon Smith	1cab8a0	2014-04-25 04:38:09 +0000	[diff] [blame]	726	: Loop.members()) {
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	727	dbgs() << " - node: " << getBlockName(M.Index) << "\n";
				728	});
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	729	}
				730
				731	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	732	LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	733	Distribution &Dist) {
				734	BlockMass Mass = getPackageMass(*this, Source);
				735	DEBUG(dbgs() << " => mass: " << Mass
				736	<< " ( general \| forward )\n");
				737
				738	// Distribute mass to successors as laid out in Dist.
				739	DitheringDistributer D(Dist, Mass);
				740
				741	#ifndef NDEBUG
				742	auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
				743	const char *Desc) {
				744	dbgs() << " => assign " << M << " (" << D.RemMass << "\|"
				745	<< D.RemForwardMass << ")";
				746	if (Desc)
				747	dbgs() << " [" << Desc << "]";
				748	if (T.isValid())
				749	dbgs() << " to " << getBlockName(T);
				750	dbgs() << "\n";
				751	};
				752	(void)debugAssign;
				753	#endif
				754
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	755	for (const Weight &W : Dist.Weights) {
				756	// Check for a local edge (forward and non-exit).
				757	if (W.Type == Weight::Local) {
				758	BlockMass Local = D.takeLocalMass(W.Amount);
				759	getPackageMass(*this, W.TargetNode) += Local;
				760	DEBUG(debugAssign(W.TargetNode, Local, nullptr));
				761	continue;
				762	}
				763
				764	// Backedges and exits only make sense if we're processing a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	765	assert(OuterLoop && "backedge or exit outside of loop");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	766
				767	// Check for a backedge.
				768	if (W.Type == Weight::Backedge) {
				769	BlockMass Back = D.takeBackedgeMass(W.Amount);
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	770	OuterLoop->BackedgeMass += Back;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	771	DEBUG(debugAssign(BlockNode(), Back, "back"));
				772	continue;
				773	}
				774
				775	// This must be an exit.
				776	assert(W.Type == Weight::Exit);
				777	BlockMass Exit = D.takeExitMass(W.Amount);
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	778	OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Exit));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	779	DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
				780	}
				781	}
				782
				783	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
				784	const Float &Min, const Float &Max) {
				785	// Scale the Factor to a size that creates integers. Ideally, integers would
				786	// be scaled so that Max == UINT64_MAX so that they can be best
				787	// differentiated. However, the register allocator currently deals poorly
				788	// with large numbers. Instead, push Min up a little from 1 to give some
				789	// room to differentiate small, unequal numbers.
				790	//
				791	// TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
				792	Float ScalingFactor = Min.inverse();
				793	if ((Max / Min).lg() < 60)
				794	ScalingFactor <<= 3;
				795
				796	// Translate the floats to integers.
				797	DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
				798	<< ", factor = " << ScalingFactor << "\n");
				799	for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
				800	Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
				801	BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
				802	DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
				803	<< BFI.Freqs[Index].Floating << ", scaled = " << Scaled
				804	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
				805	}
				806	}
				807
				808	static void scaleBlockData(BlockFrequencyInfoImplBase &BFI,
				809	const BlockNode &Node,
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	810	const LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	811	Float F = Loop.Mass.toFloat() * Loop.Scale;
				812
				813	Float &Current = BFI.Freqs[Node.Index].Floating;
				814	Float Updated = Current * F;
				815
				816	DEBUG(dbgs() << " - " << BFI.getBlockName(Node) << ": " << Current << " => "
				817	<< Updated << "\n");
				818
				819	Current = Updated;
				820	}
				821
				822	/// \brief Unwrap a loop package.
				823	///
				824	/// Visits all the members of a loop, adjusting their BlockData according to
				825	/// the loop's pseudo-node.
				826	static void unwrapLoopPackage(BlockFrequencyInfoImplBase &BFI,
Duncan P. N. Exon Smith	da0b21c	2014-04-25 04:38:23 +0000	[diff] [blame^]	827	LoopData &LoopPackage) {
				828	BlockNode Head = LoopPackage.getHeader();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	829	DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Head)
				830	<< ": mass = " << LoopPackage.Mass
				831	<< ", scale = " << LoopPackage.Scale << "\n");
				832	scaleBlockData(BFI, Head, LoopPackage);
				833
				834	// Propagate the head scale through the loop. Since members are visited in
				835	// RPO, the head scale will be updated by the loop scale first, and then the
				836	// final head scale will be used for updated the rest of the members.
Duncan P. N. Exon Smith	1cab8a0	2014-04-25 04:38:09 +0000	[diff] [blame]	837	for (const BlockNode &M : LoopPackage.members()) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	838	const FrequencyData &HeadData = BFI.Freqs[Head.Index];
				839	FrequencyData &Freqs = BFI.Freqs[M.Index];
				840	Float NewFreq = Freqs.Floating * HeadData.Floating;
				841	DEBUG(dbgs() << " - " << BFI.getBlockName(M) << ": " << Freqs.Floating
				842	<< " => " << NewFreq << "\n");
				843	Freqs.Floating = NewFreq;
				844	}
				845	}
				846
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	847	void BlockFrequencyInfoImplBase::unwrapLoops() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	848	// Set initial frequencies from loop-local masses.
				849	for (size_t Index = 0; Index < Working.size(); ++Index)
				850	Freqs[Index].Floating = Working[Index].Mass.toFloat();
				851
Duncan P. N. Exon Smith	da0b21c	2014-04-25 04:38:23 +0000	[diff] [blame^]	852	for (LoopData &Loop : Loops)
				853	unwrapLoopPackage(*this, Loop);
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	854	}
				855
				856	void BlockFrequencyInfoImplBase::finalizeMetrics() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	857	// Unwrap loop packages in reverse post-order, tracking min and max
				858	// frequencies.
				859	auto Min = Float::getLargest();
				860	auto Max = Float::getZero();
				861	for (size_t Index = 0; Index < Working.size(); ++Index) {
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	862	// Update min/max scale.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	863	Min = std::min(Min, Freqs[Index].Floating);
				864	Max = std::max(Max, Freqs[Index].Floating);
				865	}
				866
				867	// Convert to integers.
				868	convertFloatingToInteger(*this, Min, Max);
				869
				870	// Clean up data structures.
				871	cleanup(*this);
				872
				873	// Print out the final stats.
				874	DEBUG(dump());
				875	}
				876
				877	BlockFrequency
				878	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
				879	if (!Node.isValid())
				880	return 0;
				881	return Freqs[Node.Index].Integer;
				882	}
				883	Float
				884	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
				885	if (!Node.isValid())
				886	return Float::getZero();
				887	return Freqs[Node.Index].Floating;
				888	}
				889
				890	std::string
				891	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
				892	return std::string();
				893	}
				894
				895	raw_ostream &
				896	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				897	const BlockNode &Node) const {
				898	return OS << getFloatingBlockFreq(Node);
				899	}
				900
				901	raw_ostream &
				902	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				903	const BlockFrequency &Freq) const {
				904	Float Block(Freq.getFrequency(), 0);
				905	Float Entry(getEntryFreq(), 0);
				906
				907	return OS << Block / Entry;
				908	}