Blame - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp - toolchain/llvm-project

blob: c78ed88ab6543343eb2ad440964e3f6664b00bca [file] [log] [blame]

Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Loops should be simplified before this analysis.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	14	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
				15	#include "llvm/ADT/APFloat.h"
				16	#include "llvm/Support/raw_ostream.h"
				17	#include <deque>
				18
				19	using namespace llvm;
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	20	using namespace llvm::bfi_detail;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	21
Chandler Carruth	1b9dde0	2014-04-22 02:02:50 +0000	[diff] [blame]	22	#define DEBUG_TYPE "block-freq"
				23
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	24	//===----------------------------------------------------------------------===//
				25	//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	26	// UnsignedFloat implementation.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	27	//
				28	//===----------------------------------------------------------------------===//
				29	#ifndef _MSC_VER
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	30	const int32_t UnsignedFloatBase::MaxExponent;
				31	const int32_t UnsignedFloatBase::MinExponent;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	32	#endif
				33
				34	static void appendDigit(std::string &Str, unsigned D) {
				35	assert(D < 10);
				36	Str += '0' + D % 10;
				37	}
				38
				39	static void appendNumber(std::string &Str, uint64_t N) {
				40	while (N) {
				41	appendDigit(Str, N % 10);
				42	N /= 10;
				43	}
				44	}
				45
				46	static bool doesRoundUp(char Digit) {
				47	switch (Digit) {
				48	case '5':
				49	case '6':
				50	case '7':
				51	case '8':
				52	case '9':
				53	return true;
				54	default:
				55	return false;
				56	}
				57	}
				58
				59	static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	60	assert(E >= UnsignedFloatBase::MinExponent);
				61	assert(E <= UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	62
				63	// Find a new E, but don't let it increase past MaxExponent.
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	64	int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
				65	int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	66	int Shift = 63 - (NewE - E);
				67	assert(Shift <= LeadingZeros);
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	68	assert(Shift == LeadingZeros \|\| NewE == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	69	D <<= Shift;
				70	E = NewE;
				71
				72	// Check for a denormal.
				73	unsigned AdjustedE = E + 16383;
				74	if (!(D >> 63)) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	75	assert(E == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	76	AdjustedE = 0;
				77	}
				78
				79	// Build the float and print it.
				80	uint64_t RawBits[2] = {D, AdjustedE};
				81	APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
				82	SmallVector<char, 24> Chars;
				83	Float.toString(Chars, Precision, 0);
				84	return std::string(Chars.begin(), Chars.end());
				85	}
				86
				87	static std::string stripTrailingZeros(const std::string &Float) {
				88	size_t NonZero = Float.find_last_not_of('0');
				89	assert(NonZero != std::string::npos && "no . in floating point string");
				90
				91	if (Float[NonZero] == '.')
				92	++NonZero;
				93
				94	return Float.substr(0, NonZero + 1);
				95	}
				96
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	97	std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	98	unsigned Precision) {
				99	if (!D)
				100	return "0.0";
				101
				102	// Canonicalize exponent and digits.
				103	uint64_t Above0 = 0;
				104	uint64_t Below0 = 0;
				105	uint64_t Extra = 0;
				106	int ExtraShift = 0;
				107	if (E == 0) {
				108	Above0 = D;
				109	} else if (E > 0) {
				110	if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
				111	D <<= Shift;
				112	E -= Shift;
				113
				114	if (!E)
				115	Above0 = D;
				116	}
				117	} else if (E > -64) {
				118	Above0 = D >> -E;
				119	Below0 = D << (64 + E);
				120	} else if (E > -120) {
				121	Below0 = D >> (-E - 64);
				122	Extra = D << (128 + E);
				123	ExtraShift = -64 - E;
				124	}
				125
				126	// Fall back on APFloat for very small and very large numbers.
				127	if (!Above0 && !Below0)
				128	return toStringAPFloat(D, E, Precision);
				129
				130	// Append the digits before the decimal.
				131	std::string Str;
				132	size_t DigitsOut = 0;
				133	if (Above0) {
				134	appendNumber(Str, Above0);
				135	DigitsOut = Str.size();
				136	} else
				137	appendDigit(Str, 0);
				138	std::reverse(Str.begin(), Str.end());
				139
				140	// Return early if there's nothing after the decimal.
				141	if (!Below0)
				142	return Str + ".0";
				143
				144	// Append the decimal and beyond.
				145	Str += '.';
				146	uint64_t Error = UINT64_C(1) << (64 - Width);
				147
				148	// We need to shift Below0 to the right to make space for calculating
				149	// digits. Save the precision we're losing in Extra.
				150	Extra = (Below0 & 0xf) << 56 \| (Extra >> 8);
				151	Below0 >>= 4;
				152	size_t SinceDot = 0;
				153	size_t AfterDot = Str.size();
				154	do {
				155	if (ExtraShift) {
				156	--ExtraShift;
				157	Error *= 5;
				158	} else
				159	Error *= 10;
				160
				161	Below0 *= 10;
				162	Extra *= 10;
				163	Below0 += (Extra >> 60);
				164	Extra = Extra & (UINT64_MAX >> 4);
				165	appendDigit(Str, Below0 >> 60);
				166	Below0 = Below0 & (UINT64_MAX >> 4);
				167	if (DigitsOut \|\| Str.back() != '0')
				168	++DigitsOut;
				169	++SinceDot;
				170	} while (Error && (Below0 << 4 \| Extra >> 60) >= Error / 2 &&
				171	(!Precision \|\| DigitsOut <= Precision \|\| SinceDot < 2));
				172
				173	// Return early for maximum precision.
				174	if (!Precision \|\| DigitsOut <= Precision)
				175	return stripTrailingZeros(Str);
				176
				177	// Find where to truncate.
				178	size_t Truncate =
				179	std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
				180
				181	// Check if there's anything to truncate.
				182	if (Truncate >= Str.size())
				183	return stripTrailingZeros(Str);
				184
				185	bool Carry = doesRoundUp(Str[Truncate]);
				186	if (!Carry)
				187	return stripTrailingZeros(Str.substr(0, Truncate));
				188
				189	// Round with the first truncated digit.
				190	for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
				191	I != E; ++I) {
				192	if (*I == '.')
				193	continue;
				194	if (*I == '9') {
				195	*I = '0';
				196	continue;
				197	}
				198
				199	++*I;
				200	Carry = false;
				201	break;
				202	}
				203
				204	// Add "1" in front if we still need to carry.
				205	return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
				206	}
				207
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	208	raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	209	int Width, unsigned Precision) {
				210	return OS << toString(D, E, Width, Precision);
				211	}
				212
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	213	void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	214	print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
				215	<< "]";
				216	}
				217
				218	static std::pair<uint64_t, int16_t>
				219	getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
				220	if (ShouldRound)
				221	if (!++N)
				222	// Rounding caused an overflow.
				223	return std::make_pair(UINT64_C(1), Shift + 64);
				224	return std::make_pair(N, Shift);
				225	}
				226
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	227	std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	228	uint64_t Divisor) {
				229	// Input should be sanitized.
				230	assert(Divisor);
				231	assert(Dividend);
				232
				233	// Minimize size of divisor.
				234	int16_t Shift = 0;
				235	if (int Zeros = countTrailingZeros(Divisor)) {
				236	Shift -= Zeros;
				237	Divisor >>= Zeros;
				238	}
				239
				240	// Check for powers of two.
				241	if (Divisor == 1)
				242	return std::make_pair(Dividend, Shift);
				243
				244	// Maximize size of dividend.
				245	if (int Zeros = countLeadingZeros64(Dividend)) {
				246	Shift -= Zeros;
				247	Dividend <<= Zeros;
				248	}
				249
				250	// Start with the result of a divide.
				251	uint64_t Quotient = Dividend / Divisor;
				252	Dividend %= Divisor;
				253
				254	// Continue building the quotient with long division.
				255	//
				256	// TODO: continue with largers digits.
				257	while (!(Quotient >> 63) && Dividend) {
				258	// Shift Dividend, and check for overflow.
				259	bool IsOverflow = Dividend >> 63;
				260	Dividend <<= 1;
				261	--Shift;
				262
				263	// Divide.
				264	bool DoesDivide = IsOverflow \|\| Divisor <= Dividend;
				265	Quotient = (Quotient << 1) \| uint64_t(DoesDivide);
				266	Dividend -= DoesDivide ? Divisor : 0;
				267	}
				268
				269	// Round.
				270	if (Dividend >= getHalf(Divisor))
				271	if (!++Quotient)
				272	// Rounding caused an overflow in Quotient.
				273	return std::make_pair(UINT64_C(1), Shift + 64);
				274
				275	return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
				276	}
				277
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	278	std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	279	uint64_t R) {
				280	// Separate into two 32-bit digits (U.L).
				281	uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
				282
				283	// Compute cross products.
				284	uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
				285
				286	// Sum into two 64-bit digits.
				287	uint64_t Upper = P1, Lower = P4;
				288	auto addWithCarry = [&](uint64_t N) {
				289	uint64_t NewLower = Lower + (N << 32);
				290	Upper += (N >> 32) + (NewLower < Lower);
				291	Lower = NewLower;
				292	};
				293	addWithCarry(P2);
				294	addWithCarry(P3);
				295
				296	// Check whether the upper digit is empty.
				297	if (!Upper)
				298	return std::make_pair(Lower, 0);
				299
				300	// Shift as little as possible to maximize precision.
				301	unsigned LeadingZeros = countLeadingZeros64(Upper);
				302	int16_t Shift = 64 - LeadingZeros;
				303	if (LeadingZeros)
				304	Upper = Upper << LeadingZeros \| Lower >> Shift;
				305	bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
				306	return getRoundedFloat(Upper, ShouldRound, Shift);
				307	}
				308
				309	//===----------------------------------------------------------------------===//
				310	//
				311	// BlockMass implementation.
				312	//
				313	//===----------------------------------------------------------------------===//
				314	BlockMass &BlockMass::operator*=(const BranchProbability &P) {
				315	uint32_t N = P.getNumerator(), D = P.getDenominator();
				316	assert(D && "divide by 0");
				317	assert(N <= D && "fraction greater than 1");
				318
				319	// Fast path for multiplying by 1.0.
				320	if (!Mass \|\| N == D)
				321	return *this;
				322
				323	// Get as much precision as we can.
				324	int Shift = countLeadingZeros(Mass);
				325	uint64_t ShiftedQuotient = (Mass << Shift) / D;
				326	uint64_t Product = ShiftedQuotient * N >> Shift;
				327
				328	// Now check for what's lost.
				329	uint64_t Left = ShiftedQuotient * (D - N) >> Shift;
				330	uint64_t Lost = Mass - Product - Left;
				331
				332	// TODO: prove this assertion.
				333	assert(Lost <= UINT32_MAX);
				334
				335	// Take the product plus a portion of the spoils.
				336	Mass = Product + Lost * N / D;
				337	return *this;
				338	}
				339
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	340	UnsignedFloat<uint64_t> BlockMass::toFloat() const {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	341	if (isFull())
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	342	return UnsignedFloat<uint64_t>(1, 0);
				343	return UnsignedFloat<uint64_t>(getMass() + 1, -64);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	344	}
				345
				346	void BlockMass::dump() const { print(dbgs()); }
				347
				348	static char getHexDigit(int N) {
				349	assert(N < 16);
				350	if (N < 10)
				351	return '0' + N;
				352	return 'a' + N - 10;
				353	}
				354	raw_ostream &BlockMass::print(raw_ostream &OS) const {
				355	for (int Digits = 0; Digits < 16; ++Digits)
				356	OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
				357	return OS;
				358	}
				359
				360	//===----------------------------------------------------------------------===//
				361	//
				362	// BlockFrequencyInfoImpl implementation.
				363	//
				364	//===----------------------------------------------------------------------===//
				365	namespace {
				366
				367	typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
				368	typedef BlockFrequencyInfoImplBase::Distribution Distribution;
				369	typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
				370	typedef BlockFrequencyInfoImplBase::Float Float;
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	371	typedef BlockFrequencyInfoImplBase::LoopData LoopData;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	372	typedef BlockFrequencyInfoImplBase::Weight Weight;
				373	typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
				374
				375	/// \brief Dithering mass distributer.
				376	///
				377	/// This class splits up a single mass into portions by weight, dithering to
				378	/// spread out error. No mass is lost. The dithering precision depends on the
				379	/// precision of the product of \a BlockMass and \a BranchProbability.
				380	///
				381	/// The distribution algorithm follows.
				382	///
				383	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
				384	/// mass to distribute in \a RemMass.
				385	///
				386	/// 2. For each portion:
				387	///
				388	/// 1. Construct a branch probability, P, as the portion's weight divided
				389	/// by the current value of \a RemWeight.
				390	/// 2. Calculate the portion's mass as \a RemMass times P.
				391	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
				392	/// the current portion's weight and mass.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	393	struct DitheringDistributer {
				394	uint32_t RemWeight;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	395	BlockMass RemMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	396
				397	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
				398
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	399	BlockMass takeMass(uint32_t Weight);
				400	};
				401	}
				402
				403	DitheringDistributer::DitheringDistributer(Distribution &Dist,
				404	const BlockMass &Mass) {
				405	Dist.normalize();
				406	RemWeight = Dist.Total;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	407	RemMass = Mass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	408	}
				409
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	410	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
				411	assert(Weight && "invalid weight");
				412	assert(Weight <= RemWeight);
				413	BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
				414
				415	// Decrement totals (dither).
				416	RemWeight -= Weight;
				417	RemMass -= Mass;
				418	return Mass;
				419	}
				420
				421	void Distribution::add(const BlockNode &Node, uint64_t Amount,
				422	Weight::DistType Type) {
				423	assert(Amount && "invalid weight of 0");
				424	uint64_t NewTotal = Total + Amount;
				425
				426	// Check for overflow. It should be impossible to overflow twice.
				427	bool IsOverflow = NewTotal < Total;
				428	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
				429	DidOverflow \|= IsOverflow;
				430
				431	// Update the total.
				432	Total = NewTotal;
				433
				434	// Save the weight.
				435	Weight W;
				436	W.TargetNode = Node;
				437	W.Amount = Amount;
				438	W.Type = Type;
				439	Weights.push_back(W);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	440	}
				441
				442	static void combineWeight(Weight &W, const Weight &OtherW) {
				443	assert(OtherW.TargetNode.isValid());
				444	if (!W.Amount) {
				445	W = OtherW;
				446	return;
				447	}
				448	assert(W.Type == OtherW.Type);
				449	assert(W.TargetNode == OtherW.TargetNode);
Duncan P. N. Exon Smith	ebf7626	2014-04-25 04:38:40 +0000	[diff] [blame]	450	assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	451	W.Amount += OtherW.Amount;
				452	}
				453	static void combineWeightsBySorting(WeightList &Weights) {
				454	// Sort so edges to the same node are adjacent.
				455	std::sort(Weights.begin(), Weights.end(),
				456	[](const Weight &L,
				457	const Weight &R) { return L.TargetNode < R.TargetNode; });
				458
				459	// Combine adjacent edges.
				460	WeightList::iterator O = Weights.begin();
				461	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
				462	++O, (I = L)) {
				463	O = I;
				464
				465	// Find the adjacent weights to the same node.
				466	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
				467	combineWeight(O, L);
				468	}
				469
				470	// Erase extra entries.
				471	Weights.erase(O, Weights.end());
				472	return;
				473	}
				474	static void combineWeightsByHashing(WeightList &Weights) {
				475	// Collect weights into a DenseMap.
				476	typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
				477	HashTable Combined(NextPowerOf2(2 * Weights.size()));
				478	for (const Weight &W : Weights)
				479	combineWeight(Combined[W.TargetNode.Index], W);
				480
				481	// Check whether anything changed.
				482	if (Weights.size() == Combined.size())
				483	return;
				484
				485	// Fill in the new weights.
				486	Weights.clear();
				487	Weights.reserve(Combined.size());
				488	for (const auto &I : Combined)
				489	Weights.push_back(I.second);
				490	}
				491	static void combineWeights(WeightList &Weights) {
				492	// Use a hash table for many successors to keep this linear.
				493	if (Weights.size() > 128) {
				494	combineWeightsByHashing(Weights);
				495	return;
				496	}
				497
				498	combineWeightsBySorting(Weights);
				499	}
				500	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
				501	assert(Shift >= 0);
				502	assert(Shift < 64);
				503	if (!Shift)
				504	return N;
				505	return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
				506	}
				507	void Distribution::normalize() {
				508	// Early exit for termination nodes.
				509	if (Weights.empty())
				510	return;
				511
				512	// Only bother if there are multiple successors.
				513	if (Weights.size() > 1)
				514	combineWeights(Weights);
				515
				516	// Early exit when combined into a single successor.
				517	if (Weights.size() == 1) {
				518	Total = 1;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	519	Weights.front().Amount = 1;
				520	return;
				521	}
				522
				523	// Determine how much to shift right so that the total fits into 32-bits.
				524	//
				525	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
				526	// for each weight can cause a 32-bit overflow.
				527	int Shift = 0;
				528	if (DidOverflow)
				529	Shift = 33;
				530	else if (Total > UINT32_MAX)
				531	Shift = 33 - countLeadingZeros(Total);
				532
				533	// Early exit if nothing needs to be scaled.
				534	if (!Shift)
				535	return;
				536
				537	// Recompute the total through accumulation (rather than shifting it) so that
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	538	// it's accurate after shifting.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	539	Total = 0;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	540
				541	// Sum the weights to each node and shift right if necessary.
				542	for (Weight &W : Weights) {
				543	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
				544	// can round here without concern about overflow.
				545	assert(W.TargetNode.isValid());
				546	W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
				547	assert(W.Amount <= UINT32_MAX);
				548
				549	// Update the total.
				550	Total += W.Amount;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	551	}
				552	assert(Total <= UINT32_MAX);
				553	}
				554
				555	void BlockFrequencyInfoImplBase::clear() {
Duncan P. N. Exon Smith	dc2d66e	2014-04-22 03:31:34 +0000	[diff] [blame]	556	// Swap with a default-constructed std::vector, since std::vector<>::clear()
				557	// does not actually clear heap storage.
				558	std::vector<FrequencyData>().swap(Freqs);
				559	std::vector<WorkingData>().swap(Working);
Duncan P. N. Exon Smith	fc7dc93	2014-04-25 04:30:06 +0000	[diff] [blame]	560	Loops.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	561	}
				562
				563	/// \brief Clear all memory not needed downstream.
				564	///
				565	/// Releases all memory not used downstream. In particular, saves Freqs.
				566	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
				567	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
				568	BFI.clear();
				569	BFI.Freqs = std::move(SavedFreqs);
				570	}
				571
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	572	bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	573	const LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	574	const BlockNode &Pred,
				575	const BlockNode &Succ,
				576	uint64_t Weight) {
				577	if (!Weight)
				578	Weight = 1;
				579
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	580	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
				581	return OuterLoop && OuterLoop->isHeader(Node);
				582	};
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	583
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	584	BlockNode Resolved = Working[Succ.Index].getResolvedNode();
				585
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	586	#ifndef NDEBUG
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	587	auto debugSuccessor = [&](const char *Type) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	588	dbgs() << " =>"
				589	<< " [" << Type << "] weight = " << Weight;
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	590	if (!isLoopHeader(Resolved))
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	591	dbgs() << ", succ = " << getBlockName(Succ);
				592	if (Resolved != Succ)
				593	dbgs() << ", resolved = " << getBlockName(Resolved);
				594	dbgs() << "\n";
				595	};
				596	(void)debugSuccessor;
				597	#endif
				598
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	599	if (isLoopHeader(Resolved)) {
				600	DEBUG(debugSuccessor("backedge"));
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	601	Dist.addBackedge(OuterLoop->getHeader(), Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	602	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	603	}
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	604
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	605	if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	606	DEBUG(debugSuccessor(" exit "));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	607	Dist.addExit(Resolved, Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	608	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	609	}
				610
Duncan P. N. Exon Smith	b3380ea	2014-04-22 03:31:53 +0000	[diff] [blame]	611	if (Resolved < Pred) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	612	if (!isLoopHeader(Pred)) {
				613	// If OuterLoop is an irreducible loop, we can't actually handle this.
				614	assert((!OuterLoop \|\| !OuterLoop->isIrreducible()) &&
				615	"unhandled irreducible control flow");
				616
				617	// Irreducible backedge. Abort.
				618	DEBUG(debugSuccessor("abort!!!"));
				619	return false;
				620	}
				621
				622	// If "Pred" is a loop header, then this isn't really a backedge; rather,
				623	// OuterLoop must be irreducible. These false backedges can come only from
				624	// secondary loop headers.
				625	assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
				626	"unhandled irreducible control flow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	627	}
				628
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	629	DEBUG(debugSuccessor(" local "));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	630	Dist.addLocal(Resolved, Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	631	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	632	}
				633
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	634	bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	635	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	636	// Copy the exit map into Dist.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	637	for (const auto &I : Loop.Exits)
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	638	if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
				639	I.second.getMass()))
				640	// Irreducible backedge.
				641	return false;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	642
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	643	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	644	}
				645
				646	/// \brief Get the maximum allowed loop scale.
				647	///
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	648	/// Gives the maximum number of estimated iterations allowed for a loop. Very
				649	/// large numbers cause problems downstream (even within 64-bits).
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	650	static Float getMaxLoopScale() { return Float(1, 12); }
				651
				652	/// \brief Compute the loop scale for a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	653	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	654	// Compute loop scale.
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	655	DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	656
				657	// LoopScale == 1 / ExitMass
				658	// ExitMass == HeadMass - BackedgeMass
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	659	BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	660
				661	// Block scale stores the inverse of the scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	662	Loop.Scale = ExitMass.toFloat().inverse();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	663
				664	DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	665	<< " - " << Loop.BackedgeMass << ")\n"
				666	<< " - scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	667
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	668	if (Loop.Scale > getMaxLoopScale()) {
				669	Loop.Scale = getMaxLoopScale();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	670	DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
				671	}
				672	}
				673
				674	/// \brief Package up a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	675	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	676	DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
				677
				678	// Clear the subloop exits to prevent quadratic memory usage.
				679	for (const BlockNode &M : Loop.Nodes) {
				680	if (auto *Loop = Working[M.Index].getPackagedLoop())
				681	Loop->Exits.clear();
				682	DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
				683	}
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	684	Loop.IsPackaged = true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	685	}
				686
				687	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	688	LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	689	Distribution &Dist) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	690	BlockMass Mass = Working[Source.Index].getMass();
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	691	DEBUG(dbgs() << " => mass: " << Mass << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	692
				693	// Distribute mass to successors as laid out in Dist.
				694	DitheringDistributer D(Dist, Mass);
				695
				696	#ifndef NDEBUG
				697	auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
				698	const char *Desc) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	699	dbgs() << " => assign " << M << " (" << D.RemMass << ")";
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	700	if (Desc)
				701	dbgs() << " [" << Desc << "]";
				702	if (T.isValid())
				703	dbgs() << " to " << getBlockName(T);
				704	dbgs() << "\n";
				705	};
				706	(void)debugAssign;
				707	#endif
				708
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	709	for (const Weight &W : Dist.Weights) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	710	// Check for a local edge (non-backedge and non-exit).
				711	BlockMass Taken = D.takeMass(W.Amount);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	712	if (W.Type == Weight::Local) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	713	Working[W.TargetNode.Index].getMass() += Taken;
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	714	DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	715	continue;
				716	}
				717
				718	// Backedges and exits only make sense if we're processing a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	719	assert(OuterLoop && "backedge or exit outside of loop");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	720
				721	// Check for a backedge.
				722	if (W.Type == Weight::Backedge) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	723	OuterLoop->BackedgeMass += Taken;
				724	DEBUG(debugAssign(BlockNode(), Taken, "back"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	725	continue;
				726	}
				727
				728	// This must be an exit.
				729	assert(W.Type == Weight::Exit);
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	730	OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
				731	DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	732	}
				733	}
				734
				735	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
				736	const Float &Min, const Float &Max) {
				737	// Scale the Factor to a size that creates integers. Ideally, integers would
				738	// be scaled so that Max == UINT64_MAX so that they can be best
				739	// differentiated. However, the register allocator currently deals poorly
				740	// with large numbers. Instead, push Min up a little from 1 to give some
				741	// room to differentiate small, unequal numbers.
				742	//
				743	// TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
				744	Float ScalingFactor = Min.inverse();
				745	if ((Max / Min).lg() < 60)
				746	ScalingFactor <<= 3;
				747
				748	// Translate the floats to integers.
				749	DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
				750	<< ", factor = " << ScalingFactor << "\n");
				751	for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
				752	Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
				753	BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
				754	DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
				755	<< BFI.Freqs[Index].Floating << ", scaled = " << Scaled
				756	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
				757	}
				758	}
				759
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	760	/// \brief Unwrap a loop package.
				761	///
				762	/// Visits all the members of a loop, adjusting their BlockData according to
				763	/// the loop's pseudo-node.
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	764	static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	765	DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	766	<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
				767	<< "\n");
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	768	Loop.Scale *= Loop.Mass.toFloat();
				769	Loop.IsPackaged = false;
Duncan P. N. Exon Smith	3f08678	2014-04-25 04:38:32 +0000	[diff] [blame]	770	DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	771
				772	// Propagate the head scale through the loop. Since members are visited in
				773	// RPO, the head scale will be updated by the loop scale first, and then the
				774	// final head scale will be used for updated the rest of the members.
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	775	for (const BlockNode &N : Loop.Nodes) {
				776	const auto &Working = BFI.Working[N.Index];
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	777	Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	778	: BFI.Freqs[N.Index].Floating;
				779	Float New = Loop.Scale * F;
				780	DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
				781	<< "\n");
				782	F = New;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	783	}
				784	}
				785
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	786	void BlockFrequencyInfoImplBase::unwrapLoops() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	787	// Set initial frequencies from loop-local masses.
				788	for (size_t Index = 0; Index < Working.size(); ++Index)
				789	Freqs[Index].Floating = Working[Index].Mass.toFloat();
				790
Duncan P. N. Exon Smith	da0b21c	2014-04-25 04:38:23 +0000	[diff] [blame]	791	for (LoopData &Loop : Loops)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	792	unwrapLoop(*this, Loop);
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	793	}
				794
				795	void BlockFrequencyInfoImplBase::finalizeMetrics() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	796	// Unwrap loop packages in reverse post-order, tracking min and max
				797	// frequencies.
				798	auto Min = Float::getLargest();
				799	auto Max = Float::getZero();
				800	for (size_t Index = 0; Index < Working.size(); ++Index) {
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	801	// Update min/max scale.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	802	Min = std::min(Min, Freqs[Index].Floating);
				803	Max = std::max(Max, Freqs[Index].Floating);
				804	}
				805
				806	// Convert to integers.
				807	convertFloatingToInteger(*this, Min, Max);
				808
				809	// Clean up data structures.
				810	cleanup(*this);
				811
				812	// Print out the final stats.
				813	DEBUG(dump());
				814	}
				815
				816	BlockFrequency
				817	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
				818	if (!Node.isValid())
				819	return 0;
				820	return Freqs[Node.Index].Integer;
				821	}
				822	Float
				823	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
				824	if (!Node.isValid())
				825	return Float::getZero();
				826	return Freqs[Node.Index].Floating;
				827	}
				828
				829	std::string
				830	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
				831	return std::string();
				832	}
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	833	std::string
				834	BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
				835	return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "*" : "");
				836	}
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	837
				838	raw_ostream &
				839	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				840	const BlockNode &Node) const {
				841	return OS << getFloatingBlockFreq(Node);
				842	}
				843
				844	raw_ostream &
				845	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				846	const BlockFrequency &Freq) const {
				847	Float Block(Freq.getFrequency(), 0);
				848	Float Entry(getEntryFreq(), 0);
				849
				850	return OS << Block / Entry;
				851	}
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	852
				853	void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
				854	Start = OuterLoop.getHeader();
				855	Nodes.reserve(OuterLoop.Nodes.size());
				856	for (auto N : OuterLoop.Nodes)
				857	addNode(N);
				858	indexNodes();
				859	}
				860	void IrreducibleGraph::addNodesInFunction() {
				861	Start = 0;
				862	for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
				863	if (!BFI.Working[Index].isPackaged())
				864	addNode(Index);
				865	indexNodes();
				866	}
				867	void IrreducibleGraph::indexNodes() {
				868	for (auto &I : Nodes)
				869	Lookup[I.Node.Index] = &I;
				870	}
				871	void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
				872	const BFIBase::LoopData *OuterLoop) {
				873	if (OuterLoop && OuterLoop->isHeader(Succ))
				874	return;
				875	auto L = Lookup.find(Succ.Index);
				876	if (L == Lookup.end())
				877	return;
				878	IrrNode &SuccIrr = *L->second;
				879	Irr.Edges.push_back(&SuccIrr);
				880	SuccIrr.Edges.push_front(&Irr);
				881	++SuccIrr.NumIn;
				882	}
				883
				884	namespace llvm {
				885	template <> struct GraphTraits<IrreducibleGraph> {
				886	typedef bfi_detail::IrreducibleGraph GraphT;
				887
Duncan P. N. Exon Smith	295b5e7	2014-04-28 20:22:29 +0000	[diff] [blame^]	888	typedef const GraphT::IrrNode NodeType;
				889	typedef GraphT::IrrNode::iterator ChildIteratorType;
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	890
				891	static const NodeType *getEntryNode(const GraphT &G) {
				892	return G.StartIrr;
				893	}
				894	static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
				895	static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
				896	};
				897	}
				898
				899	/// \brief Find extra irreducible headers.
				900	///
				901	/// Find entry blocks and other blocks with backedges, which exist when \c G
				902	/// contains irreducible sub-SCCs.
				903	static void findIrreducibleHeaders(
				904	const BlockFrequencyInfoImplBase &BFI,
				905	const IrreducibleGraph &G,
				906	const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
				907	LoopData::NodeList &Headers, LoopData::NodeList &Others) {
				908	// Map from nodes in the SCC to whether it's an entry block.
				909	SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
				910
				911	// InSCC also acts the set of nodes in the graph. Seed it.
				912	for (const auto *I : SCC)
				913	InSCC[I] = false;
				914
				915	for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
				916	auto &Irr = *I->first;
				917	for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
				918	if (InSCC.count(P))
				919	continue;
				920
				921	// This is an entry block.
				922	I->second = true;
				923	Headers.push_back(Irr.Node);
				924	DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
				925	break;
				926	}
				927	}
				928	assert(Headers.size() >= 2 && "Should be irreducible");
				929	if (Headers.size() == InSCC.size()) {
				930	// Every block is a header.
				931	std::sort(Headers.begin(), Headers.end());
				932	return;
				933	}
				934
				935	// Look for extra headers from irreducible sub-SCCs.
				936	for (const auto &I : InSCC) {
				937	// Entry blocks are already headers.
				938	if (I.second)
				939	continue;
				940
				941	auto &Irr = *I.first;
				942	for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
				943	// Skip forward edges.
				944	if (P->Node < Irr.Node)
				945	continue;
				946
				947	// Skip predecessors from entry blocks. These can have inverted
				948	// ordering.
				949	if (InSCC.lookup(P))
				950	continue;
				951
				952	// Store the extra header.
				953	Headers.push_back(Irr.Node);
				954	DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
				955	break;
				956	}
				957	if (Headers.back() == Irr.Node)
				958	// Added this as a header.
				959	continue;
				960
				961	// This is not a header.
				962	Others.push_back(Irr.Node);
				963	DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
				964	}
				965	std::sort(Headers.begin(), Headers.end());
				966	std::sort(Others.begin(), Others.end());
				967	}
				968
				969	static void createIrreducibleLoop(
				970	BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
				971	LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
				972	const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
				973	// Translate the SCC into RPO.
				974	DEBUG(dbgs() << " - found-scc\n");
				975
				976	LoopData::NodeList Headers;
				977	LoopData::NodeList Others;
				978	findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
				979
				980	auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
				981	Headers.end(), Others.begin(), Others.end());
				982
				983	// Update loop hierarchy.
				984	for (const auto &N : Loop->Nodes)
				985	if (BFI.Working[N.Index].isLoopHeader())
				986	BFI.Working[N.Index].Loop->Parent = &*Loop;
				987	else
				988	BFI.Working[N.Index].Loop = &*Loop;
				989	}
				990
				991	iterator_range<std::list<LoopData>::iterator>
				992	BlockFrequencyInfoImplBase::analyzeIrreducible(
				993	const IrreducibleGraph &G, LoopData *OuterLoop,
				994	std::list<LoopData>::iterator Insert) {
				995	assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
				996	auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
				997
				998	for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
				999	if (I->size() < 2)
				1000	continue;
				1001
				1002	// Translate the SCC into RPO.
				1003	createIrreducibleLoop(this, G, OuterLoop, Insert, I);
				1004	}
				1005
				1006	if (OuterLoop)
				1007	return make_range(std::next(Prev), Insert);
				1008	return make_range(Loops.begin(), Insert);
				1009	}
				1010
				1011	void
				1012	BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
				1013	OuterLoop.Exits.clear();
				1014	OuterLoop.BackedgeMass = BlockMass::getEmpty();
				1015	auto O = OuterLoop.Nodes.begin() + 1;
				1016	for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
				1017	if (!Working[I->Index].isPackaged())
				1018	O++ = I;
				1019	OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
				1020	}