Blame - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp - toolchain/llvm-project

blob: 87d93a4bd5f91316c235928a9ab118dbed3f1f02 [file] [log] [blame]

Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Loops should be simplified before this analysis.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	14	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
				15	#include "llvm/ADT/APFloat.h"
Duncan P. N. Exon Smith	87c40fd	2014-05-06 01:57:42 +0000	[diff] [blame^]	16	#include "llvm/ADT/SCCIterator.h"
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	17	#include "llvm/Support/raw_ostream.h"
				18	#include <deque>
				19
				20	using namespace llvm;
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	21	using namespace llvm::bfi_detail;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	22
Chandler Carruth	1b9dde0	2014-04-22 02:02:50 +0000	[diff] [blame]	23	#define DEBUG_TYPE "block-freq"
				24
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	25	//===----------------------------------------------------------------------===//
				26	//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	27	// UnsignedFloat implementation.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	28	//
				29	//===----------------------------------------------------------------------===//
				30	#ifndef _MSC_VER
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	31	const int32_t UnsignedFloatBase::MaxExponent;
				32	const int32_t UnsignedFloatBase::MinExponent;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	33	#endif
				34
				35	static void appendDigit(std::string &Str, unsigned D) {
				36	assert(D < 10);
				37	Str += '0' + D % 10;
				38	}
				39
				40	static void appendNumber(std::string &Str, uint64_t N) {
				41	while (N) {
				42	appendDigit(Str, N % 10);
				43	N /= 10;
				44	}
				45	}
				46
				47	static bool doesRoundUp(char Digit) {
				48	switch (Digit) {
				49	case '5':
				50	case '6':
				51	case '7':
				52	case '8':
				53	case '9':
				54	return true;
				55	default:
				56	return false;
				57	}
				58	}
				59
				60	static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	61	assert(E >= UnsignedFloatBase::MinExponent);
				62	assert(E <= UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	63
				64	// Find a new E, but don't let it increase past MaxExponent.
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	65	int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
				66	int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	67	int Shift = 63 - (NewE - E);
				68	assert(Shift <= LeadingZeros);
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	69	assert(Shift == LeadingZeros \|\| NewE == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	70	D <<= Shift;
				71	E = NewE;
				72
				73	// Check for a denormal.
				74	unsigned AdjustedE = E + 16383;
				75	if (!(D >> 63)) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	76	assert(E == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	77	AdjustedE = 0;
				78	}
				79
				80	// Build the float and print it.
				81	uint64_t RawBits[2] = {D, AdjustedE};
				82	APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
				83	SmallVector<char, 24> Chars;
				84	Float.toString(Chars, Precision, 0);
				85	return std::string(Chars.begin(), Chars.end());
				86	}
				87
				88	static std::string stripTrailingZeros(const std::string &Float) {
				89	size_t NonZero = Float.find_last_not_of('0');
				90	assert(NonZero != std::string::npos && "no . in floating point string");
				91
				92	if (Float[NonZero] == '.')
				93	++NonZero;
				94
				95	return Float.substr(0, NonZero + 1);
				96	}
				97
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	98	std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	99	unsigned Precision) {
				100	if (!D)
				101	return "0.0";
				102
				103	// Canonicalize exponent and digits.
				104	uint64_t Above0 = 0;
				105	uint64_t Below0 = 0;
				106	uint64_t Extra = 0;
				107	int ExtraShift = 0;
				108	if (E == 0) {
				109	Above0 = D;
				110	} else if (E > 0) {
				111	if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
				112	D <<= Shift;
				113	E -= Shift;
				114
				115	if (!E)
				116	Above0 = D;
				117	}
				118	} else if (E > -64) {
				119	Above0 = D >> -E;
				120	Below0 = D << (64 + E);
				121	} else if (E > -120) {
				122	Below0 = D >> (-E - 64);
				123	Extra = D << (128 + E);
				124	ExtraShift = -64 - E;
				125	}
				126
				127	// Fall back on APFloat for very small and very large numbers.
				128	if (!Above0 && !Below0)
				129	return toStringAPFloat(D, E, Precision);
				130
				131	// Append the digits before the decimal.
				132	std::string Str;
				133	size_t DigitsOut = 0;
				134	if (Above0) {
				135	appendNumber(Str, Above0);
				136	DigitsOut = Str.size();
				137	} else
				138	appendDigit(Str, 0);
				139	std::reverse(Str.begin(), Str.end());
				140
				141	// Return early if there's nothing after the decimal.
				142	if (!Below0)
				143	return Str + ".0";
				144
				145	// Append the decimal and beyond.
				146	Str += '.';
				147	uint64_t Error = UINT64_C(1) << (64 - Width);
				148
				149	// We need to shift Below0 to the right to make space for calculating
				150	// digits. Save the precision we're losing in Extra.
				151	Extra = (Below0 & 0xf) << 56 \| (Extra >> 8);
				152	Below0 >>= 4;
				153	size_t SinceDot = 0;
				154	size_t AfterDot = Str.size();
				155	do {
				156	if (ExtraShift) {
				157	--ExtraShift;
				158	Error *= 5;
				159	} else
				160	Error *= 10;
				161
				162	Below0 *= 10;
				163	Extra *= 10;
				164	Below0 += (Extra >> 60);
				165	Extra = Extra & (UINT64_MAX >> 4);
				166	appendDigit(Str, Below0 >> 60);
				167	Below0 = Below0 & (UINT64_MAX >> 4);
				168	if (DigitsOut \|\| Str.back() != '0')
				169	++DigitsOut;
				170	++SinceDot;
				171	} while (Error && (Below0 << 4 \| Extra >> 60) >= Error / 2 &&
				172	(!Precision \|\| DigitsOut <= Precision \|\| SinceDot < 2));
				173
				174	// Return early for maximum precision.
				175	if (!Precision \|\| DigitsOut <= Precision)
				176	return stripTrailingZeros(Str);
				177
				178	// Find where to truncate.
				179	size_t Truncate =
				180	std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
				181
				182	// Check if there's anything to truncate.
				183	if (Truncate >= Str.size())
				184	return stripTrailingZeros(Str);
				185
				186	bool Carry = doesRoundUp(Str[Truncate]);
				187	if (!Carry)
				188	return stripTrailingZeros(Str.substr(0, Truncate));
				189
				190	// Round with the first truncated digit.
				191	for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
				192	I != E; ++I) {
				193	if (*I == '.')
				194	continue;
				195	if (*I == '9') {
				196	*I = '0';
				197	continue;
				198	}
				199
				200	++*I;
				201	Carry = false;
				202	break;
				203	}
				204
				205	// Add "1" in front if we still need to carry.
				206	return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
				207	}
				208
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	209	raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	210	int Width, unsigned Precision) {
				211	return OS << toString(D, E, Width, Precision);
				212	}
				213
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	214	void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	215	print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
				216	<< "]";
				217	}
				218
				219	static std::pair<uint64_t, int16_t>
				220	getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
				221	if (ShouldRound)
				222	if (!++N)
				223	// Rounding caused an overflow.
				224	return std::make_pair(UINT64_C(1), Shift + 64);
				225	return std::make_pair(N, Shift);
				226	}
				227
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	228	std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	229	uint64_t Divisor) {
				230	// Input should be sanitized.
				231	assert(Divisor);
				232	assert(Dividend);
				233
				234	// Minimize size of divisor.
				235	int16_t Shift = 0;
				236	if (int Zeros = countTrailingZeros(Divisor)) {
				237	Shift -= Zeros;
				238	Divisor >>= Zeros;
				239	}
				240
				241	// Check for powers of two.
				242	if (Divisor == 1)
				243	return std::make_pair(Dividend, Shift);
				244
				245	// Maximize size of dividend.
				246	if (int Zeros = countLeadingZeros64(Dividend)) {
				247	Shift -= Zeros;
				248	Dividend <<= Zeros;
				249	}
				250
				251	// Start with the result of a divide.
				252	uint64_t Quotient = Dividend / Divisor;
				253	Dividend %= Divisor;
				254
				255	// Continue building the quotient with long division.
				256	//
				257	// TODO: continue with largers digits.
				258	while (!(Quotient >> 63) && Dividend) {
				259	// Shift Dividend, and check for overflow.
				260	bool IsOverflow = Dividend >> 63;
				261	Dividend <<= 1;
				262	--Shift;
				263
				264	// Divide.
				265	bool DoesDivide = IsOverflow \|\| Divisor <= Dividend;
				266	Quotient = (Quotient << 1) \| uint64_t(DoesDivide);
				267	Dividend -= DoesDivide ? Divisor : 0;
				268	}
				269
				270	// Round.
				271	if (Dividend >= getHalf(Divisor))
				272	if (!++Quotient)
				273	// Rounding caused an overflow in Quotient.
				274	return std::make_pair(UINT64_C(1), Shift + 64);
				275
				276	return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
				277	}
				278
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	279	std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	280	uint64_t R) {
				281	// Separate into two 32-bit digits (U.L).
				282	uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
				283
				284	// Compute cross products.
				285	uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
				286
				287	// Sum into two 64-bit digits.
				288	uint64_t Upper = P1, Lower = P4;
				289	auto addWithCarry = [&](uint64_t N) {
				290	uint64_t NewLower = Lower + (N << 32);
				291	Upper += (N >> 32) + (NewLower < Lower);
				292	Lower = NewLower;
				293	};
				294	addWithCarry(P2);
				295	addWithCarry(P3);
				296
				297	// Check whether the upper digit is empty.
				298	if (!Upper)
				299	return std::make_pair(Lower, 0);
				300
				301	// Shift as little as possible to maximize precision.
				302	unsigned LeadingZeros = countLeadingZeros64(Upper);
				303	int16_t Shift = 64 - LeadingZeros;
				304	if (LeadingZeros)
				305	Upper = Upper << LeadingZeros \| Lower >> Shift;
				306	bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
				307	return getRoundedFloat(Upper, ShouldRound, Shift);
				308	}
				309
				310	//===----------------------------------------------------------------------===//
				311	//
				312	// BlockMass implementation.
				313	//
				314	//===----------------------------------------------------------------------===//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	315	UnsignedFloat<uint64_t> BlockMass::toFloat() const {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	316	if (isFull())
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	317	return UnsignedFloat<uint64_t>(1, 0);
				318	return UnsignedFloat<uint64_t>(getMass() + 1, -64);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	319	}
				320
				321	void BlockMass::dump() const { print(dbgs()); }
				322
				323	static char getHexDigit(int N) {
				324	assert(N < 16);
				325	if (N < 10)
				326	return '0' + N;
				327	return 'a' + N - 10;
				328	}
				329	raw_ostream &BlockMass::print(raw_ostream &OS) const {
				330	for (int Digits = 0; Digits < 16; ++Digits)
				331	OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
				332	return OS;
				333	}
				334
				335	//===----------------------------------------------------------------------===//
				336	//
				337	// BlockFrequencyInfoImpl implementation.
				338	//
				339	//===----------------------------------------------------------------------===//
				340	namespace {
				341
				342	typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
				343	typedef BlockFrequencyInfoImplBase::Distribution Distribution;
				344	typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
				345	typedef BlockFrequencyInfoImplBase::Float Float;
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	346	typedef BlockFrequencyInfoImplBase::LoopData LoopData;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	347	typedef BlockFrequencyInfoImplBase::Weight Weight;
				348	typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
				349
				350	/// \brief Dithering mass distributer.
				351	///
				352	/// This class splits up a single mass into portions by weight, dithering to
				353	/// spread out error. No mass is lost. The dithering precision depends on the
				354	/// precision of the product of \a BlockMass and \a BranchProbability.
				355	///
				356	/// The distribution algorithm follows.
				357	///
				358	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
				359	/// mass to distribute in \a RemMass.
				360	///
				361	/// 2. For each portion:
				362	///
				363	/// 1. Construct a branch probability, P, as the portion's weight divided
				364	/// by the current value of \a RemWeight.
				365	/// 2. Calculate the portion's mass as \a RemMass times P.
				366	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
				367	/// the current portion's weight and mass.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	368	struct DitheringDistributer {
				369	uint32_t RemWeight;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	370	BlockMass RemMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	371
				372	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
				373
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	374	BlockMass takeMass(uint32_t Weight);
				375	};
				376	}
				377
				378	DitheringDistributer::DitheringDistributer(Distribution &Dist,
				379	const BlockMass &Mass) {
				380	Dist.normalize();
				381	RemWeight = Dist.Total;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	382	RemMass = Mass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	383	}
				384
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	385	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
				386	assert(Weight && "invalid weight");
				387	assert(Weight <= RemWeight);
				388	BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
				389
				390	// Decrement totals (dither).
				391	RemWeight -= Weight;
				392	RemMass -= Mass;
				393	return Mass;
				394	}
				395
				396	void Distribution::add(const BlockNode &Node, uint64_t Amount,
				397	Weight::DistType Type) {
				398	assert(Amount && "invalid weight of 0");
				399	uint64_t NewTotal = Total + Amount;
				400
				401	// Check for overflow. It should be impossible to overflow twice.
				402	bool IsOverflow = NewTotal < Total;
				403	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
				404	DidOverflow \|= IsOverflow;
				405
				406	// Update the total.
				407	Total = NewTotal;
				408
				409	// Save the weight.
				410	Weight W;
				411	W.TargetNode = Node;
				412	W.Amount = Amount;
				413	W.Type = Type;
				414	Weights.push_back(W);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	415	}
				416
				417	static void combineWeight(Weight &W, const Weight &OtherW) {
				418	assert(OtherW.TargetNode.isValid());
				419	if (!W.Amount) {
				420	W = OtherW;
				421	return;
				422	}
				423	assert(W.Type == OtherW.Type);
				424	assert(W.TargetNode == OtherW.TargetNode);
Duncan P. N. Exon Smith	ebf7626	2014-04-25 04:38:40 +0000	[diff] [blame]	425	assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	426	W.Amount += OtherW.Amount;
				427	}
				428	static void combineWeightsBySorting(WeightList &Weights) {
				429	// Sort so edges to the same node are adjacent.
				430	std::sort(Weights.begin(), Weights.end(),
				431	[](const Weight &L,
				432	const Weight &R) { return L.TargetNode < R.TargetNode; });
				433
				434	// Combine adjacent edges.
				435	WeightList::iterator O = Weights.begin();
				436	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
				437	++O, (I = L)) {
				438	O = I;
				439
				440	// Find the adjacent weights to the same node.
				441	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
				442	combineWeight(O, L);
				443	}
				444
				445	// Erase extra entries.
				446	Weights.erase(O, Weights.end());
				447	return;
				448	}
				449	static void combineWeightsByHashing(WeightList &Weights) {
				450	// Collect weights into a DenseMap.
				451	typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
				452	HashTable Combined(NextPowerOf2(2 * Weights.size()));
				453	for (const Weight &W : Weights)
				454	combineWeight(Combined[W.TargetNode.Index], W);
				455
				456	// Check whether anything changed.
				457	if (Weights.size() == Combined.size())
				458	return;
				459
				460	// Fill in the new weights.
				461	Weights.clear();
				462	Weights.reserve(Combined.size());
				463	for (const auto &I : Combined)
				464	Weights.push_back(I.second);
				465	}
				466	static void combineWeights(WeightList &Weights) {
				467	// Use a hash table for many successors to keep this linear.
				468	if (Weights.size() > 128) {
				469	combineWeightsByHashing(Weights);
				470	return;
				471	}
				472
				473	combineWeightsBySorting(Weights);
				474	}
				475	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
				476	assert(Shift >= 0);
				477	assert(Shift < 64);
				478	if (!Shift)
				479	return N;
				480	return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
				481	}
				482	void Distribution::normalize() {
				483	// Early exit for termination nodes.
				484	if (Weights.empty())
				485	return;
				486
				487	// Only bother if there are multiple successors.
				488	if (Weights.size() > 1)
				489	combineWeights(Weights);
				490
				491	// Early exit when combined into a single successor.
				492	if (Weights.size() == 1) {
				493	Total = 1;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	494	Weights.front().Amount = 1;
				495	return;
				496	}
				497
				498	// Determine how much to shift right so that the total fits into 32-bits.
				499	//
				500	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
				501	// for each weight can cause a 32-bit overflow.
				502	int Shift = 0;
				503	if (DidOverflow)
				504	Shift = 33;
				505	else if (Total > UINT32_MAX)
				506	Shift = 33 - countLeadingZeros(Total);
				507
				508	// Early exit if nothing needs to be scaled.
				509	if (!Shift)
				510	return;
				511
				512	// Recompute the total through accumulation (rather than shifting it) so that
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	513	// it's accurate after shifting.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	514	Total = 0;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	515
				516	// Sum the weights to each node and shift right if necessary.
				517	for (Weight &W : Weights) {
				518	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
				519	// can round here without concern about overflow.
				520	assert(W.TargetNode.isValid());
				521	W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
				522	assert(W.Amount <= UINT32_MAX);
				523
				524	// Update the total.
				525	Total += W.Amount;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	526	}
				527	assert(Total <= UINT32_MAX);
				528	}
				529
				530	void BlockFrequencyInfoImplBase::clear() {
Duncan P. N. Exon Smith	dc2d66e	2014-04-22 03:31:34 +0000	[diff] [blame]	531	// Swap with a default-constructed std::vector, since std::vector<>::clear()
				532	// does not actually clear heap storage.
				533	std::vector<FrequencyData>().swap(Freqs);
				534	std::vector<WorkingData>().swap(Working);
Duncan P. N. Exon Smith	fc7dc93	2014-04-25 04:30:06 +0000	[diff] [blame]	535	Loops.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	536	}
				537
				538	/// \brief Clear all memory not needed downstream.
				539	///
				540	/// Releases all memory not used downstream. In particular, saves Freqs.
				541	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
				542	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
				543	BFI.clear();
				544	BFI.Freqs = std::move(SavedFreqs);
				545	}
				546
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	547	bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	548	const LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	549	const BlockNode &Pred,
				550	const BlockNode &Succ,
				551	uint64_t Weight) {
				552	if (!Weight)
				553	Weight = 1;
				554
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	555	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
				556	return OuterLoop && OuterLoop->isHeader(Node);
				557	};
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	558
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	559	BlockNode Resolved = Working[Succ.Index].getResolvedNode();
				560
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	561	#ifndef NDEBUG
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	562	auto debugSuccessor = [&](const char *Type) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	563	dbgs() << " =>"
				564	<< " [" << Type << "] weight = " << Weight;
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	565	if (!isLoopHeader(Resolved))
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	566	dbgs() << ", succ = " << getBlockName(Succ);
				567	if (Resolved != Succ)
				568	dbgs() << ", resolved = " << getBlockName(Resolved);
				569	dbgs() << "\n";
				570	};
				571	(void)debugSuccessor;
				572	#endif
				573
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	574	if (isLoopHeader(Resolved)) {
				575	DEBUG(debugSuccessor("backedge"));
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	576	Dist.addBackedge(OuterLoop->getHeader(), Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	577	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	578	}
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	579
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	580	if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	581	DEBUG(debugSuccessor(" exit "));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	582	Dist.addExit(Resolved, Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	583	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	584	}
				585
Duncan P. N. Exon Smith	b3380ea	2014-04-22 03:31:53 +0000	[diff] [blame]	586	if (Resolved < Pred) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	587	if (!isLoopHeader(Pred)) {
				588	// If OuterLoop is an irreducible loop, we can't actually handle this.
				589	assert((!OuterLoop \|\| !OuterLoop->isIrreducible()) &&
				590	"unhandled irreducible control flow");
				591
				592	// Irreducible backedge. Abort.
				593	DEBUG(debugSuccessor("abort!!!"));
				594	return false;
				595	}
				596
				597	// If "Pred" is a loop header, then this isn't really a backedge; rather,
				598	// OuterLoop must be irreducible. These false backedges can come only from
				599	// secondary loop headers.
				600	assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
				601	"unhandled irreducible control flow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	602	}
				603
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	604	DEBUG(debugSuccessor(" local "));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	605	Dist.addLocal(Resolved, Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	606	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	607	}
				608
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	609	bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	610	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	611	// Copy the exit map into Dist.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	612	for (const auto &I : Loop.Exits)
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	613	if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
				614	I.second.getMass()))
				615	// Irreducible backedge.
				616	return false;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	617
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	618	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	619	}
				620
				621	/// \brief Get the maximum allowed loop scale.
				622	///
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	623	/// Gives the maximum number of estimated iterations allowed for a loop. Very
				624	/// large numbers cause problems downstream (even within 64-bits).
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	625	static Float getMaxLoopScale() { return Float(1, 12); }
				626
				627	/// \brief Compute the loop scale for a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	628	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	629	// Compute loop scale.
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	630	DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	631
				632	// LoopScale == 1 / ExitMass
				633	// ExitMass == HeadMass - BackedgeMass
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	634	BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	635
				636	// Block scale stores the inverse of the scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	637	Loop.Scale = ExitMass.toFloat().inverse();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	638
				639	DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	640	<< " - " << Loop.BackedgeMass << ")\n"
				641	<< " - scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	642
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	643	if (Loop.Scale > getMaxLoopScale()) {
				644	Loop.Scale = getMaxLoopScale();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	645	DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
				646	}
				647	}
				648
				649	/// \brief Package up a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	650	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	651	DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
				652
				653	// Clear the subloop exits to prevent quadratic memory usage.
				654	for (const BlockNode &M : Loop.Nodes) {
				655	if (auto *Loop = Working[M.Index].getPackagedLoop())
				656	Loop->Exits.clear();
				657	DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
				658	}
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	659	Loop.IsPackaged = true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	660	}
				661
				662	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	663	LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	664	Distribution &Dist) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	665	BlockMass Mass = Working[Source.Index].getMass();
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	666	DEBUG(dbgs() << " => mass: " << Mass << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	667
				668	// Distribute mass to successors as laid out in Dist.
				669	DitheringDistributer D(Dist, Mass);
				670
				671	#ifndef NDEBUG
				672	auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
				673	const char *Desc) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	674	dbgs() << " => assign " << M << " (" << D.RemMass << ")";
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	675	if (Desc)
				676	dbgs() << " [" << Desc << "]";
				677	if (T.isValid())
				678	dbgs() << " to " << getBlockName(T);
				679	dbgs() << "\n";
				680	};
				681	(void)debugAssign;
				682	#endif
				683
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	684	for (const Weight &W : Dist.Weights) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	685	// Check for a local edge (non-backedge and non-exit).
				686	BlockMass Taken = D.takeMass(W.Amount);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	687	if (W.Type == Weight::Local) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	688	Working[W.TargetNode.Index].getMass() += Taken;
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	689	DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	690	continue;
				691	}
				692
				693	// Backedges and exits only make sense if we're processing a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	694	assert(OuterLoop && "backedge or exit outside of loop");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	695
				696	// Check for a backedge.
				697	if (W.Type == Weight::Backedge) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	698	OuterLoop->BackedgeMass += Taken;
				699	DEBUG(debugAssign(BlockNode(), Taken, "back"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	700	continue;
				701	}
				702
				703	// This must be an exit.
				704	assert(W.Type == Weight::Exit);
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	705	OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
				706	DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	707	}
				708	}
				709
				710	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
				711	const Float &Min, const Float &Max) {
				712	// Scale the Factor to a size that creates integers. Ideally, integers would
				713	// be scaled so that Max == UINT64_MAX so that they can be best
				714	// differentiated. However, the register allocator currently deals poorly
				715	// with large numbers. Instead, push Min up a little from 1 to give some
				716	// room to differentiate small, unequal numbers.
				717	//
				718	// TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
				719	Float ScalingFactor = Min.inverse();
				720	if ((Max / Min).lg() < 60)
				721	ScalingFactor <<= 3;
				722
				723	// Translate the floats to integers.
				724	DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
				725	<< ", factor = " << ScalingFactor << "\n");
				726	for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
				727	Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
				728	BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
				729	DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
				730	<< BFI.Freqs[Index].Floating << ", scaled = " << Scaled
				731	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
				732	}
				733	}
				734
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	735	/// \brief Unwrap a loop package.
				736	///
				737	/// Visits all the members of a loop, adjusting their BlockData according to
				738	/// the loop's pseudo-node.
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	739	static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	740	DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	741	<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
				742	<< "\n");
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	743	Loop.Scale *= Loop.Mass.toFloat();
				744	Loop.IsPackaged = false;
Duncan P. N. Exon Smith	3f08678	2014-04-25 04:38:32 +0000	[diff] [blame]	745	DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	746
				747	// Propagate the head scale through the loop. Since members are visited in
				748	// RPO, the head scale will be updated by the loop scale first, and then the
				749	// final head scale will be used for updated the rest of the members.
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	750	for (const BlockNode &N : Loop.Nodes) {
				751	const auto &Working = BFI.Working[N.Index];
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	752	Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	753	: BFI.Freqs[N.Index].Floating;
				754	Float New = Loop.Scale * F;
				755	DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
				756	<< "\n");
				757	F = New;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	758	}
				759	}
				760
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	761	void BlockFrequencyInfoImplBase::unwrapLoops() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	762	// Set initial frequencies from loop-local masses.
				763	for (size_t Index = 0; Index < Working.size(); ++Index)
				764	Freqs[Index].Floating = Working[Index].Mass.toFloat();
				765
Duncan P. N. Exon Smith	da0b21c	2014-04-25 04:38:23 +0000	[diff] [blame]	766	for (LoopData &Loop : Loops)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	767	unwrapLoop(*this, Loop);
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	768	}
				769
				770	void BlockFrequencyInfoImplBase::finalizeMetrics() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	771	// Unwrap loop packages in reverse post-order, tracking min and max
				772	// frequencies.
				773	auto Min = Float::getLargest();
				774	auto Max = Float::getZero();
				775	for (size_t Index = 0; Index < Working.size(); ++Index) {
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	776	// Update min/max scale.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	777	Min = std::min(Min, Freqs[Index].Floating);
				778	Max = std::max(Max, Freqs[Index].Floating);
				779	}
				780
				781	// Convert to integers.
				782	convertFloatingToInteger(*this, Min, Max);
				783
				784	// Clean up data structures.
				785	cleanup(*this);
				786
				787	// Print out the final stats.
				788	DEBUG(dump());
				789	}
				790
				791	BlockFrequency
				792	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
				793	if (!Node.isValid())
				794	return 0;
				795	return Freqs[Node.Index].Integer;
				796	}
				797	Float
				798	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
				799	if (!Node.isValid())
				800	return Float::getZero();
				801	return Freqs[Node.Index].Floating;
				802	}
				803
				804	std::string
				805	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
				806	return std::string();
				807	}
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	808	std::string
				809	BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
				810	return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "*" : "");
				811	}
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	812
				813	raw_ostream &
				814	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				815	const BlockNode &Node) const {
				816	return OS << getFloatingBlockFreq(Node);
				817	}
				818
				819	raw_ostream &
				820	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				821	const BlockFrequency &Freq) const {
				822	Float Block(Freq.getFrequency(), 0);
				823	Float Entry(getEntryFreq(), 0);
				824
				825	return OS << Block / Entry;
				826	}
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	827
				828	void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
				829	Start = OuterLoop.getHeader();
				830	Nodes.reserve(OuterLoop.Nodes.size());
				831	for (auto N : OuterLoop.Nodes)
				832	addNode(N);
				833	indexNodes();
				834	}
				835	void IrreducibleGraph::addNodesInFunction() {
				836	Start = 0;
				837	for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
				838	if (!BFI.Working[Index].isPackaged())
				839	addNode(Index);
				840	indexNodes();
				841	}
				842	void IrreducibleGraph::indexNodes() {
				843	for (auto &I : Nodes)
				844	Lookup[I.Node.Index] = &I;
				845	}
				846	void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
				847	const BFIBase::LoopData *OuterLoop) {
				848	if (OuterLoop && OuterLoop->isHeader(Succ))
				849	return;
				850	auto L = Lookup.find(Succ.Index);
				851	if (L == Lookup.end())
				852	return;
				853	IrrNode &SuccIrr = *L->second;
				854	Irr.Edges.push_back(&SuccIrr);
				855	SuccIrr.Edges.push_front(&Irr);
				856	++SuccIrr.NumIn;
				857	}
				858
				859	namespace llvm {
				860	template <> struct GraphTraits<IrreducibleGraph> {
				861	typedef bfi_detail::IrreducibleGraph GraphT;
				862
Duncan P. N. Exon Smith	295b5e7	2014-04-28 20:22:29 +0000	[diff] [blame]	863	typedef const GraphT::IrrNode NodeType;
				864	typedef GraphT::IrrNode::iterator ChildIteratorType;
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	865
				866	static const NodeType *getEntryNode(const GraphT &G) {
				867	return G.StartIrr;
				868	}
				869	static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
				870	static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
				871	};
				872	}
				873
				874	/// \brief Find extra irreducible headers.
				875	///
				876	/// Find entry blocks and other blocks with backedges, which exist when \c G
				877	/// contains irreducible sub-SCCs.
				878	static void findIrreducibleHeaders(
				879	const BlockFrequencyInfoImplBase &BFI,
				880	const IrreducibleGraph &G,
				881	const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
				882	LoopData::NodeList &Headers, LoopData::NodeList &Others) {
				883	// Map from nodes in the SCC to whether it's an entry block.
				884	SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
				885
				886	// InSCC also acts the set of nodes in the graph. Seed it.
				887	for (const auto *I : SCC)
				888	InSCC[I] = false;
				889
				890	for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
				891	auto &Irr = *I->first;
				892	for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
				893	if (InSCC.count(P))
				894	continue;
				895
				896	// This is an entry block.
				897	I->second = true;
				898	Headers.push_back(Irr.Node);
				899	DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
				900	break;
				901	}
				902	}
				903	assert(Headers.size() >= 2 && "Should be irreducible");
				904	if (Headers.size() == InSCC.size()) {
				905	// Every block is a header.
				906	std::sort(Headers.begin(), Headers.end());
				907	return;
				908	}
				909
				910	// Look for extra headers from irreducible sub-SCCs.
				911	for (const auto &I : InSCC) {
				912	// Entry blocks are already headers.
				913	if (I.second)
				914	continue;
				915
				916	auto &Irr = *I.first;
				917	for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
				918	// Skip forward edges.
				919	if (P->Node < Irr.Node)
				920	continue;
				921
				922	// Skip predecessors from entry blocks. These can have inverted
				923	// ordering.
				924	if (InSCC.lookup(P))
				925	continue;
				926
				927	// Store the extra header.
				928	Headers.push_back(Irr.Node);
				929	DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
				930	break;
				931	}
				932	if (Headers.back() == Irr.Node)
				933	// Added this as a header.
				934	continue;
				935
				936	// This is not a header.
				937	Others.push_back(Irr.Node);
				938	DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
				939	}
				940	std::sort(Headers.begin(), Headers.end());
				941	std::sort(Others.begin(), Others.end());
				942	}
				943
				944	static void createIrreducibleLoop(
				945	BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
				946	LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
				947	const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
				948	// Translate the SCC into RPO.
				949	DEBUG(dbgs() << " - found-scc\n");
				950
				951	LoopData::NodeList Headers;
				952	LoopData::NodeList Others;
				953	findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
				954
				955	auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
				956	Headers.end(), Others.begin(), Others.end());
				957
				958	// Update loop hierarchy.
				959	for (const auto &N : Loop->Nodes)
				960	if (BFI.Working[N.Index].isLoopHeader())
				961	BFI.Working[N.Index].Loop->Parent = &*Loop;
				962	else
				963	BFI.Working[N.Index].Loop = &*Loop;
				964	}
				965
				966	iterator_range<std::list<LoopData>::iterator>
				967	BlockFrequencyInfoImplBase::analyzeIrreducible(
				968	const IrreducibleGraph &G, LoopData *OuterLoop,
				969	std::list<LoopData>::iterator Insert) {
				970	assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
				971	auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
				972
				973	for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
				974	if (I->size() < 2)
				975	continue;
				976
				977	// Translate the SCC into RPO.
				978	createIrreducibleLoop(this, G, OuterLoop, Insert, I);
				979	}
				980
				981	if (OuterLoop)
				982	return make_range(std::next(Prev), Insert);
				983	return make_range(Loops.begin(), Insert);
				984	}
				985
				986	void
				987	BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
				988	OuterLoop.Exits.clear();
				989	OuterLoop.BackedgeMass = BlockMass::getEmpty();
				990	auto O = OuterLoop.Nodes.begin() + 1;
				991	for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
				992	if (!Working[I->Index].isPackaged())
				993	O++ = I;
				994	OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
				995	}