Blame - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp - toolchain/llvm-project

blob: 744bbe2fe956a6524a4d6192b3cab43f4fb0c90c [file] [log] [blame]

Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Loops should be simplified before this analysis.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	14	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
				15	#include "llvm/ADT/APFloat.h"
				16	#include "llvm/Support/raw_ostream.h"
				17	#include <deque>
				18
				19	using namespace llvm;
				20
Chandler Carruth	1b9dde0	2014-04-22 02:02:50 +0000	[diff] [blame]	21	#define DEBUG_TYPE "block-freq"
				22
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	23	//===----------------------------------------------------------------------===//
				24	//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	25	// UnsignedFloat implementation.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	26	//
				27	//===----------------------------------------------------------------------===//
				28	#ifndef _MSC_VER
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	29	const int32_t UnsignedFloatBase::MaxExponent;
				30	const int32_t UnsignedFloatBase::MinExponent;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	31	#endif
				32
				33	static void appendDigit(std::string &Str, unsigned D) {
				34	assert(D < 10);
				35	Str += '0' + D % 10;
				36	}
				37
				38	static void appendNumber(std::string &Str, uint64_t N) {
				39	while (N) {
				40	appendDigit(Str, N % 10);
				41	N /= 10;
				42	}
				43	}
				44
				45	static bool doesRoundUp(char Digit) {
				46	switch (Digit) {
				47	case '5':
				48	case '6':
				49	case '7':
				50	case '8':
				51	case '9':
				52	return true;
				53	default:
				54	return false;
				55	}
				56	}
				57
				58	static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	59	assert(E >= UnsignedFloatBase::MinExponent);
				60	assert(E <= UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	61
				62	// Find a new E, but don't let it increase past MaxExponent.
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	63	int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
				64	int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	65	int Shift = 63 - (NewE - E);
				66	assert(Shift <= LeadingZeros);
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	67	assert(Shift == LeadingZeros \|\| NewE == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	68	D <<= Shift;
				69	E = NewE;
				70
				71	// Check for a denormal.
				72	unsigned AdjustedE = E + 16383;
				73	if (!(D >> 63)) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	74	assert(E == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	75	AdjustedE = 0;
				76	}
				77
				78	// Build the float and print it.
				79	uint64_t RawBits[2] = {D, AdjustedE};
				80	APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
				81	SmallVector<char, 24> Chars;
				82	Float.toString(Chars, Precision, 0);
				83	return std::string(Chars.begin(), Chars.end());
				84	}
				85
				86	static std::string stripTrailingZeros(const std::string &Float) {
				87	size_t NonZero = Float.find_last_not_of('0');
				88	assert(NonZero != std::string::npos && "no . in floating point string");
				89
				90	if (Float[NonZero] == '.')
				91	++NonZero;
				92
				93	return Float.substr(0, NonZero + 1);
				94	}
				95
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	96	std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	97	unsigned Precision) {
				98	if (!D)
				99	return "0.0";
				100
				101	// Canonicalize exponent and digits.
				102	uint64_t Above0 = 0;
				103	uint64_t Below0 = 0;
				104	uint64_t Extra = 0;
				105	int ExtraShift = 0;
				106	if (E == 0) {
				107	Above0 = D;
				108	} else if (E > 0) {
				109	if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
				110	D <<= Shift;
				111	E -= Shift;
				112
				113	if (!E)
				114	Above0 = D;
				115	}
				116	} else if (E > -64) {
				117	Above0 = D >> -E;
				118	Below0 = D << (64 + E);
				119	} else if (E > -120) {
				120	Below0 = D >> (-E - 64);
				121	Extra = D << (128 + E);
				122	ExtraShift = -64 - E;
				123	}
				124
				125	// Fall back on APFloat for very small and very large numbers.
				126	if (!Above0 && !Below0)
				127	return toStringAPFloat(D, E, Precision);
				128
				129	// Append the digits before the decimal.
				130	std::string Str;
				131	size_t DigitsOut = 0;
				132	if (Above0) {
				133	appendNumber(Str, Above0);
				134	DigitsOut = Str.size();
				135	} else
				136	appendDigit(Str, 0);
				137	std::reverse(Str.begin(), Str.end());
				138
				139	// Return early if there's nothing after the decimal.
				140	if (!Below0)
				141	return Str + ".0";
				142
				143	// Append the decimal and beyond.
				144	Str += '.';
				145	uint64_t Error = UINT64_C(1) << (64 - Width);
				146
				147	// We need to shift Below0 to the right to make space for calculating
				148	// digits. Save the precision we're losing in Extra.
				149	Extra = (Below0 & 0xf) << 56 \| (Extra >> 8);
				150	Below0 >>= 4;
				151	size_t SinceDot = 0;
				152	size_t AfterDot = Str.size();
				153	do {
				154	if (ExtraShift) {
				155	--ExtraShift;
				156	Error *= 5;
				157	} else
				158	Error *= 10;
				159
				160	Below0 *= 10;
				161	Extra *= 10;
				162	Below0 += (Extra >> 60);
				163	Extra = Extra & (UINT64_MAX >> 4);
				164	appendDigit(Str, Below0 >> 60);
				165	Below0 = Below0 & (UINT64_MAX >> 4);
				166	if (DigitsOut \|\| Str.back() != '0')
				167	++DigitsOut;
				168	++SinceDot;
				169	} while (Error && (Below0 << 4 \| Extra >> 60) >= Error / 2 &&
				170	(!Precision \|\| DigitsOut <= Precision \|\| SinceDot < 2));
				171
				172	// Return early for maximum precision.
				173	if (!Precision \|\| DigitsOut <= Precision)
				174	return stripTrailingZeros(Str);
				175
				176	// Find where to truncate.
				177	size_t Truncate =
				178	std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
				179
				180	// Check if there's anything to truncate.
				181	if (Truncate >= Str.size())
				182	return stripTrailingZeros(Str);
				183
				184	bool Carry = doesRoundUp(Str[Truncate]);
				185	if (!Carry)
				186	return stripTrailingZeros(Str.substr(0, Truncate));
				187
				188	// Round with the first truncated digit.
				189	for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
				190	I != E; ++I) {
				191	if (*I == '.')
				192	continue;
				193	if (*I == '9') {
				194	*I = '0';
				195	continue;
				196	}
				197
				198	++*I;
				199	Carry = false;
				200	break;
				201	}
				202
				203	// Add "1" in front if we still need to carry.
				204	return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
				205	}
				206
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	207	raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	208	int Width, unsigned Precision) {
				209	return OS << toString(D, E, Width, Precision);
				210	}
				211
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	212	void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	213	print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
				214	<< "]";
				215	}
				216
				217	static std::pair<uint64_t, int16_t>
				218	getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
				219	if (ShouldRound)
				220	if (!++N)
				221	// Rounding caused an overflow.
				222	return std::make_pair(UINT64_C(1), Shift + 64);
				223	return std::make_pair(N, Shift);
				224	}
				225
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	226	std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	227	uint64_t Divisor) {
				228	// Input should be sanitized.
				229	assert(Divisor);
				230	assert(Dividend);
				231
				232	// Minimize size of divisor.
				233	int16_t Shift = 0;
				234	if (int Zeros = countTrailingZeros(Divisor)) {
				235	Shift -= Zeros;
				236	Divisor >>= Zeros;
				237	}
				238
				239	// Check for powers of two.
				240	if (Divisor == 1)
				241	return std::make_pair(Dividend, Shift);
				242
				243	// Maximize size of dividend.
				244	if (int Zeros = countLeadingZeros64(Dividend)) {
				245	Shift -= Zeros;
				246	Dividend <<= Zeros;
				247	}
				248
				249	// Start with the result of a divide.
				250	uint64_t Quotient = Dividend / Divisor;
				251	Dividend %= Divisor;
				252
				253	// Continue building the quotient with long division.
				254	//
				255	// TODO: continue with largers digits.
				256	while (!(Quotient >> 63) && Dividend) {
				257	// Shift Dividend, and check for overflow.
				258	bool IsOverflow = Dividend >> 63;
				259	Dividend <<= 1;
				260	--Shift;
				261
				262	// Divide.
				263	bool DoesDivide = IsOverflow \|\| Divisor <= Dividend;
				264	Quotient = (Quotient << 1) \| uint64_t(DoesDivide);
				265	Dividend -= DoesDivide ? Divisor : 0;
				266	}
				267
				268	// Round.
				269	if (Dividend >= getHalf(Divisor))
				270	if (!++Quotient)
				271	// Rounding caused an overflow in Quotient.
				272	return std::make_pair(UINT64_C(1), Shift + 64);
				273
				274	return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
				275	}
				276
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	277	std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	278	uint64_t R) {
				279	// Separate into two 32-bit digits (U.L).
				280	uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
				281
				282	// Compute cross products.
				283	uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
				284
				285	// Sum into two 64-bit digits.
				286	uint64_t Upper = P1, Lower = P4;
				287	auto addWithCarry = [&](uint64_t N) {
				288	uint64_t NewLower = Lower + (N << 32);
				289	Upper += (N >> 32) + (NewLower < Lower);
				290	Lower = NewLower;
				291	};
				292	addWithCarry(P2);
				293	addWithCarry(P3);
				294
				295	// Check whether the upper digit is empty.
				296	if (!Upper)
				297	return std::make_pair(Lower, 0);
				298
				299	// Shift as little as possible to maximize precision.
				300	unsigned LeadingZeros = countLeadingZeros64(Upper);
				301	int16_t Shift = 64 - LeadingZeros;
				302	if (LeadingZeros)
				303	Upper = Upper << LeadingZeros \| Lower >> Shift;
				304	bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
				305	return getRoundedFloat(Upper, ShouldRound, Shift);
				306	}
				307
				308	//===----------------------------------------------------------------------===//
				309	//
				310	// BlockMass implementation.
				311	//
				312	//===----------------------------------------------------------------------===//
				313	BlockMass &BlockMass::operator*=(const BranchProbability &P) {
				314	uint32_t N = P.getNumerator(), D = P.getDenominator();
				315	assert(D && "divide by 0");
				316	assert(N <= D && "fraction greater than 1");
				317
				318	// Fast path for multiplying by 1.0.
				319	if (!Mass \|\| N == D)
				320	return *this;
				321
				322	// Get as much precision as we can.
				323	int Shift = countLeadingZeros(Mass);
				324	uint64_t ShiftedQuotient = (Mass << Shift) / D;
				325	uint64_t Product = ShiftedQuotient * N >> Shift;
				326
				327	// Now check for what's lost.
				328	uint64_t Left = ShiftedQuotient * (D - N) >> Shift;
				329	uint64_t Lost = Mass - Product - Left;
				330
				331	// TODO: prove this assertion.
				332	assert(Lost <= UINT32_MAX);
				333
				334	// Take the product plus a portion of the spoils.
				335	Mass = Product + Lost * N / D;
				336	return *this;
				337	}
				338
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	339	UnsignedFloat<uint64_t> BlockMass::toFloat() const {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	340	if (isFull())
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	341	return UnsignedFloat<uint64_t>(1, 0);
				342	return UnsignedFloat<uint64_t>(getMass() + 1, -64);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	343	}
				344
				345	void BlockMass::dump() const { print(dbgs()); }
				346
				347	static char getHexDigit(int N) {
				348	assert(N < 16);
				349	if (N < 10)
				350	return '0' + N;
				351	return 'a' + N - 10;
				352	}
				353	raw_ostream &BlockMass::print(raw_ostream &OS) const {
				354	for (int Digits = 0; Digits < 16; ++Digits)
				355	OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
				356	return OS;
				357	}
				358
				359	//===----------------------------------------------------------------------===//
				360	//
				361	// BlockFrequencyInfoImpl implementation.
				362	//
				363	//===----------------------------------------------------------------------===//
				364	namespace {
				365
				366	typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
				367	typedef BlockFrequencyInfoImplBase::Distribution Distribution;
				368	typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
				369	typedef BlockFrequencyInfoImplBase::Float Float;
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	370	typedef BlockFrequencyInfoImplBase::LoopData LoopData;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	371	typedef BlockFrequencyInfoImplBase::Weight Weight;
				372	typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
				373
				374	/// \brief Dithering mass distributer.
				375	///
				376	/// This class splits up a single mass into portions by weight, dithering to
				377	/// spread out error. No mass is lost. The dithering precision depends on the
				378	/// precision of the product of \a BlockMass and \a BranchProbability.
				379	///
				380	/// The distribution algorithm follows.
				381	///
				382	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
				383	/// mass to distribute in \a RemMass.
				384	///
				385	/// 2. For each portion:
				386	///
				387	/// 1. Construct a branch probability, P, as the portion's weight divided
				388	/// by the current value of \a RemWeight.
				389	/// 2. Calculate the portion's mass as \a RemMass times P.
				390	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
				391	/// the current portion's weight and mass.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	392	struct DitheringDistributer {
				393	uint32_t RemWeight;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	394	BlockMass RemMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	395
				396	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
				397
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	398	BlockMass takeMass(uint32_t Weight);
				399	};
				400	}
				401
				402	DitheringDistributer::DitheringDistributer(Distribution &Dist,
				403	const BlockMass &Mass) {
				404	Dist.normalize();
				405	RemWeight = Dist.Total;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	406	RemMass = Mass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	407	}
				408
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	409	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
				410	assert(Weight && "invalid weight");
				411	assert(Weight <= RemWeight);
				412	BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
				413
				414	// Decrement totals (dither).
				415	RemWeight -= Weight;
				416	RemMass -= Mass;
				417	return Mass;
				418	}
				419
				420	void Distribution::add(const BlockNode &Node, uint64_t Amount,
				421	Weight::DistType Type) {
				422	assert(Amount && "invalid weight of 0");
				423	uint64_t NewTotal = Total + Amount;
				424
				425	// Check for overflow. It should be impossible to overflow twice.
				426	bool IsOverflow = NewTotal < Total;
				427	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
				428	DidOverflow \|= IsOverflow;
				429
				430	// Update the total.
				431	Total = NewTotal;
				432
				433	// Save the weight.
				434	Weight W;
				435	W.TargetNode = Node;
				436	W.Amount = Amount;
				437	W.Type = Type;
				438	Weights.push_back(W);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	439	}
				440
				441	static void combineWeight(Weight &W, const Weight &OtherW) {
				442	assert(OtherW.TargetNode.isValid());
				443	if (!W.Amount) {
				444	W = OtherW;
				445	return;
				446	}
				447	assert(W.Type == OtherW.Type);
				448	assert(W.TargetNode == OtherW.TargetNode);
Duncan P. N. Exon Smith	ebf7626	2014-04-25 04:38:40 +0000	[diff] [blame]	449	assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	450	W.Amount += OtherW.Amount;
				451	}
				452	static void combineWeightsBySorting(WeightList &Weights) {
				453	// Sort so edges to the same node are adjacent.
				454	std::sort(Weights.begin(), Weights.end(),
				455	[](const Weight &L,
				456	const Weight &R) { return L.TargetNode < R.TargetNode; });
				457
				458	// Combine adjacent edges.
				459	WeightList::iterator O = Weights.begin();
				460	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
				461	++O, (I = L)) {
				462	O = I;
				463
				464	// Find the adjacent weights to the same node.
				465	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
				466	combineWeight(O, L);
				467	}
				468
				469	// Erase extra entries.
				470	Weights.erase(O, Weights.end());
				471	return;
				472	}
				473	static void combineWeightsByHashing(WeightList &Weights) {
				474	// Collect weights into a DenseMap.
				475	typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
				476	HashTable Combined(NextPowerOf2(2 * Weights.size()));
				477	for (const Weight &W : Weights)
				478	combineWeight(Combined[W.TargetNode.Index], W);
				479
				480	// Check whether anything changed.
				481	if (Weights.size() == Combined.size())
				482	return;
				483
				484	// Fill in the new weights.
				485	Weights.clear();
				486	Weights.reserve(Combined.size());
				487	for (const auto &I : Combined)
				488	Weights.push_back(I.second);
				489	}
				490	static void combineWeights(WeightList &Weights) {
				491	// Use a hash table for many successors to keep this linear.
				492	if (Weights.size() > 128) {
				493	combineWeightsByHashing(Weights);
				494	return;
				495	}
				496
				497	combineWeightsBySorting(Weights);
				498	}
				499	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
				500	assert(Shift >= 0);
				501	assert(Shift < 64);
				502	if (!Shift)
				503	return N;
				504	return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
				505	}
				506	void Distribution::normalize() {
				507	// Early exit for termination nodes.
				508	if (Weights.empty())
				509	return;
				510
				511	// Only bother if there are multiple successors.
				512	if (Weights.size() > 1)
				513	combineWeights(Weights);
				514
				515	// Early exit when combined into a single successor.
				516	if (Weights.size() == 1) {
				517	Total = 1;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	518	Weights.front().Amount = 1;
				519	return;
				520	}
				521
				522	// Determine how much to shift right so that the total fits into 32-bits.
				523	//
				524	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
				525	// for each weight can cause a 32-bit overflow.
				526	int Shift = 0;
				527	if (DidOverflow)
				528	Shift = 33;
				529	else if (Total > UINT32_MAX)
				530	Shift = 33 - countLeadingZeros(Total);
				531
				532	// Early exit if nothing needs to be scaled.
				533	if (!Shift)
				534	return;
				535
				536	// Recompute the total through accumulation (rather than shifting it) so that
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	537	// it's accurate after shifting.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	538	Total = 0;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	539
				540	// Sum the weights to each node and shift right if necessary.
				541	for (Weight &W : Weights) {
				542	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
				543	// can round here without concern about overflow.
				544	assert(W.TargetNode.isValid());
				545	W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
				546	assert(W.Amount <= UINT32_MAX);
				547
				548	// Update the total.
				549	Total += W.Amount;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	550	}
				551	assert(Total <= UINT32_MAX);
				552	}
				553
				554	void BlockFrequencyInfoImplBase::clear() {
Duncan P. N. Exon Smith	dc2d66e	2014-04-22 03:31:34 +0000	[diff] [blame]	555	// Swap with a default-constructed std::vector, since std::vector<>::clear()
				556	// does not actually clear heap storage.
				557	std::vector<FrequencyData>().swap(Freqs);
				558	std::vector<WorkingData>().swap(Working);
Duncan P. N. Exon Smith	fc7dc93	2014-04-25 04:30:06 +0000	[diff] [blame]	559	Loops.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	560	}
				561
				562	/// \brief Clear all memory not needed downstream.
				563	///
				564	/// Releases all memory not used downstream. In particular, saves Freqs.
				565	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
				566	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
				567	BFI.clear();
				568	BFI.Freqs = std::move(SavedFreqs);
				569	}
				570
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	571	/// \brief Get the appropriate mass for a possible pseudo-node loop package.
				572	///
				573	/// Get appropriate mass for Node. If Node is a loop-header (whose loop has
				574	/// been packaged), returns the mass of its pseudo-node. If it's a node inside
				575	/// a packaged loop, it returns the loop's pseudo-node.
				576	static BlockMass &getPackageMass(BlockFrequencyInfoImplBase &BFI,
				577	const BlockNode &Node) {
				578	assert(Node.isValid());
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	579	assert(!BFI.Working[Node.Index].isPackaged());
Duncan P. N. Exon Smith	e142363	2014-04-22 03:31:37 +0000	[diff] [blame]	580	if (!BFI.Working[Node.Index].isAPackage())
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	581	return BFI.Working[Node.Index].Mass;
				582
				583	return BFI.getLoopPackage(Node).Mass;
				584	}
				585
				586	void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	587	const LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	588	const BlockNode &Pred,
				589	const BlockNode &Succ,
				590	uint64_t Weight) {
				591	if (!Weight)
				592	Weight = 1;
				593
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	594	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
				595	return OuterLoop && OuterLoop->isHeader(Node);
				596	};
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	597
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	598	#ifndef NDEBUG
				599	auto debugSuccessor = [&](const char *Type, const BlockNode &Resolved) {
				600	dbgs() << " =>"
				601	<< " [" << Type << "] weight = " << Weight;
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	602	if (!isLoopHeader(Succ))
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	603	dbgs() << ", succ = " << getBlockName(Succ);
				604	if (Resolved != Succ)
				605	dbgs() << ", resolved = " << getBlockName(Resolved);
				606	dbgs() << "\n";
				607	};
				608	(void)debugSuccessor;
				609	#endif
				610
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	611	if (isLoopHeader(Succ)) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	612	DEBUG(debugSuccessor("backedge", Succ));
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	613	Dist.addBackedge(OuterLoop->getHeader(), Weight);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	614	return;
				615	}
Duncan P. N. Exon Smith	c9b7cfea	2014-04-25 04:38:12 +0000	[diff] [blame]	616	BlockNode Resolved = getPackagedNode(Succ);
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	617	assert(!isLoopHeader(Resolved));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	618
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	619	if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	620	DEBUG(debugSuccessor(" exit ", Resolved));
				621	Dist.addExit(Resolved, Weight);
				622	return;
				623	}
				624
Duncan P. N. Exon Smith	b3380ea	2014-04-22 03:31:53 +0000	[diff] [blame]	625	if (Resolved < Pred) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	626	// Irreducible backedge. Skip this edge in the distribution.
				627	DEBUG(debugSuccessor("skipped ", Resolved));
				628	return;
				629	}
				630
				631	DEBUG(debugSuccessor(" local ", Resolved));
				632	Dist.addLocal(Resolved, Weight);
				633	}
				634
				635	void BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	636	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	637	// Copy the exit map into Dist.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	638	for (const auto &I : Loop.Exits)
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	639	addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, I.second.getMass());
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	640
				641	// We don't need this map any more. Clear it to prevent quadratic memory
				642	// usage in deeply nested loops with irreducible control flow.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	643	Loop.Exits.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	644	}
				645
				646	/// \brief Get the maximum allowed loop scale.
				647	///
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	648	/// Gives the maximum number of estimated iterations allowed for a loop. Very
				649	/// large numbers cause problems downstream (even within 64-bits).
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	650	static Float getMaxLoopScale() { return Float(1, 12); }
				651
				652	/// \brief Compute the loop scale for a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	653	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	654	// Compute loop scale.
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	655	DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(Loop.getHeader())
				656	<< "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	657
				658	// LoopScale == 1 / ExitMass
				659	// ExitMass == HeadMass - BackedgeMass
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	660	BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	661
				662	// Block scale stores the inverse of the scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	663	Loop.Scale = ExitMass.toFloat().inverse();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	664
				665	DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	666	<< " - " << Loop.BackedgeMass << ")\n"
				667	<< " - scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	668
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	669	if (Loop.Scale > getMaxLoopScale()) {
				670	Loop.Scale = getMaxLoopScale();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	671	DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
				672	}
				673	}
				674
				675	/// \brief Package up a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	676	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	677	DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.getHeader()) << "\n");
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	678	Loop.IsPackaged = true;
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	679	DEBUG(for (const BlockNode &M
Duncan P. N. Exon Smith	1cab8a0	2014-04-25 04:38:09 +0000	[diff] [blame]	680	: Loop.members()) {
Duncan P. N. Exon Smith	2984a64	2014-04-22 03:31:44 +0000	[diff] [blame]	681	dbgs() << " - node: " << getBlockName(M.Index) << "\n";
				682	});
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	683	}
				684
				685	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	686	LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	687	Distribution &Dist) {
				688	BlockMass Mass = getPackageMass(*this, Source);
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	689	DEBUG(dbgs() << " => mass: " << Mass << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	690
				691	// Distribute mass to successors as laid out in Dist.
				692	DitheringDistributer D(Dist, Mass);
				693
				694	#ifndef NDEBUG
				695	auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
				696	const char *Desc) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	697	dbgs() << " => assign " << M << " (" << D.RemMass << ")";
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	698	if (Desc)
				699	dbgs() << " [" << Desc << "]";
				700	if (T.isValid())
				701	dbgs() << " to " << getBlockName(T);
				702	dbgs() << "\n";
				703	};
				704	(void)debugAssign;
				705	#endif
				706
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	707	for (const Weight &W : Dist.Weights) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	708	// Check for a local edge (non-backedge and non-exit).
				709	BlockMass Taken = D.takeMass(W.Amount);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	710	if (W.Type == Weight::Local) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	711	getPackageMass(*this, W.TargetNode) += Taken;
				712	DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	713	continue;
				714	}
				715
				716	// Backedges and exits only make sense if we're processing a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	717	assert(OuterLoop && "backedge or exit outside of loop");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	718
				719	// Check for a backedge.
				720	if (W.Type == Weight::Backedge) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	721	OuterLoop->BackedgeMass += Taken;
				722	DEBUG(debugAssign(BlockNode(), Taken, "back"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	723	continue;
				724	}
				725
				726	// This must be an exit.
				727	assert(W.Type == Weight::Exit);
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame^]	728	OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
				729	DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	730	}
				731	}
				732
				733	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
				734	const Float &Min, const Float &Max) {
				735	// Scale the Factor to a size that creates integers. Ideally, integers would
				736	// be scaled so that Max == UINT64_MAX so that they can be best
				737	// differentiated. However, the register allocator currently deals poorly
				738	// with large numbers. Instead, push Min up a little from 1 to give some
				739	// room to differentiate small, unequal numbers.
				740	//
				741	// TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
				742	Float ScalingFactor = Min.inverse();
				743	if ((Max / Min).lg() < 60)
				744	ScalingFactor <<= 3;
				745
				746	// Translate the floats to integers.
				747	DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
				748	<< ", factor = " << ScalingFactor << "\n");
				749	for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
				750	Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
				751	BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
				752	DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
				753	<< BFI.Freqs[Index].Floating << ", scaled = " << Scaled
				754	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
				755	}
				756	}
				757
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	758	/// \brief Unwrap a loop package.
				759	///
				760	/// Visits all the members of a loop, adjusting their BlockData according to
				761	/// the loop's pseudo-node.
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	762	static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	763	DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Loop.getHeader())
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	764	<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
				765	<< "\n");
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	766	Loop.Scale *= Loop.Mass.toFloat();
				767	Loop.IsPackaged = false;
Duncan P. N. Exon Smith	3f08678	2014-04-25 04:38:32 +0000	[diff] [blame]	768	DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	769
				770	// Propagate the head scale through the loop. Since members are visited in
				771	// RPO, the head scale will be updated by the loop scale first, and then the
				772	// final head scale will be used for updated the rest of the members.
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	773	for (const BlockNode &N : Loop.Nodes) {
				774	const auto &Working = BFI.Working[N.Index];
				775	Float &F = Working.isAPackage() ? BFI.getLoopPackage(N).Scale
				776	: BFI.Freqs[N.Index].Floating;
				777	Float New = Loop.Scale * F;
				778	DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
				779	<< "\n");
				780	F = New;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	781	}
				782	}
				783
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	784	void BlockFrequencyInfoImplBase::unwrapLoops() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	785	// Set initial frequencies from loop-local masses.
				786	for (size_t Index = 0; Index < Working.size(); ++Index)
				787	Freqs[Index].Floating = Working[Index].Mass.toFloat();
				788
Duncan P. N. Exon Smith	da0b21c	2014-04-25 04:38:23 +0000	[diff] [blame]	789	for (LoopData &Loop : Loops)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	790	unwrapLoop(*this, Loop);
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	791	}
				792
				793	void BlockFrequencyInfoImplBase::finalizeMetrics() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	794	// Unwrap loop packages in reverse post-order, tracking min and max
				795	// frequencies.
				796	auto Min = Float::getLargest();
				797	auto Max = Float::getZero();
				798	for (size_t Index = 0; Index < Working.size(); ++Index) {
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	799	// Update min/max scale.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	800	Min = std::min(Min, Freqs[Index].Floating);
				801	Max = std::max(Max, Freqs[Index].Floating);
				802	}
				803
				804	// Convert to integers.
				805	convertFloatingToInteger(*this, Min, Max);
				806
				807	// Clean up data structures.
				808	cleanup(*this);
				809
				810	// Print out the final stats.
				811	DEBUG(dump());
				812	}
				813
				814	BlockFrequency
				815	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
				816	if (!Node.isValid())
				817	return 0;
				818	return Freqs[Node.Index].Integer;
				819	}
				820	Float
				821	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
				822	if (!Node.isValid())
				823	return Float::getZero();
				824	return Freqs[Node.Index].Floating;
				825	}
				826
				827	std::string
				828	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
				829	return std::string();
				830	}
				831
				832	raw_ostream &
				833	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				834	const BlockNode &Node) const {
				835	return OS << getFloatingBlockFreq(Node);
				836	}
				837
				838	raw_ostream &
				839	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				840	const BlockFrequency &Freq) const {
				841	Float Block(Freq.getFrequency(), 0);
				842	Float Entry(getEntryFreq(), 0);
				843
				844	return OS << Block / Entry;
				845	}