Blame - llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp - toolchain/llvm-project

blob: 4a61e3446db8d5c650f96e37949e58fe5f286733 [file] [log] [blame]

Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Loops should be simplified before this analysis.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	14	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
				15	#include "llvm/ADT/APFloat.h"
				16	#include "llvm/Support/raw_ostream.h"
				17	#include <deque>
				18
				19	using namespace llvm;
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	20	using namespace llvm::bfi_detail;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	21
Chandler Carruth	1b9dde0	2014-04-22 02:02:50 +0000	[diff] [blame]	22	#define DEBUG_TYPE "block-freq"
				23
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	24	//===----------------------------------------------------------------------===//
				25	//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	26	// UnsignedFloat implementation.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	27	//
				28	//===----------------------------------------------------------------------===//
				29	#ifndef _MSC_VER
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	30	const int32_t UnsignedFloatBase::MaxExponent;
				31	const int32_t UnsignedFloatBase::MinExponent;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	32	#endif
				33
				34	static void appendDigit(std::string &Str, unsigned D) {
				35	assert(D < 10);
				36	Str += '0' + D % 10;
				37	}
				38
				39	static void appendNumber(std::string &Str, uint64_t N) {
				40	while (N) {
				41	appendDigit(Str, N % 10);
				42	N /= 10;
				43	}
				44	}
				45
				46	static bool doesRoundUp(char Digit) {
				47	switch (Digit) {
				48	case '5':
				49	case '6':
				50	case '7':
				51	case '8':
				52	case '9':
				53	return true;
				54	default:
				55	return false;
				56	}
				57	}
				58
				59	static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	60	assert(E >= UnsignedFloatBase::MinExponent);
				61	assert(E <= UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	62
				63	// Find a new E, but don't let it increase past MaxExponent.
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	64	int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
				65	int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	66	int Shift = 63 - (NewE - E);
				67	assert(Shift <= LeadingZeros);
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	68	assert(Shift == LeadingZeros \|\| NewE == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	69	D <<= Shift;
				70	E = NewE;
				71
				72	// Check for a denormal.
				73	unsigned AdjustedE = E + 16383;
				74	if (!(D >> 63)) {
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	75	assert(E == UnsignedFloatBase::MaxExponent);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	76	AdjustedE = 0;
				77	}
				78
				79	// Build the float and print it.
				80	uint64_t RawBits[2] = {D, AdjustedE};
				81	APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
				82	SmallVector<char, 24> Chars;
				83	Float.toString(Chars, Precision, 0);
				84	return std::string(Chars.begin(), Chars.end());
				85	}
				86
				87	static std::string stripTrailingZeros(const std::string &Float) {
				88	size_t NonZero = Float.find_last_not_of('0');
				89	assert(NonZero != std::string::npos && "no . in floating point string");
				90
				91	if (Float[NonZero] == '.')
				92	++NonZero;
				93
				94	return Float.substr(0, NonZero + 1);
				95	}
				96
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	97	std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	98	unsigned Precision) {
				99	if (!D)
				100	return "0.0";
				101
				102	// Canonicalize exponent and digits.
				103	uint64_t Above0 = 0;
				104	uint64_t Below0 = 0;
				105	uint64_t Extra = 0;
				106	int ExtraShift = 0;
				107	if (E == 0) {
				108	Above0 = D;
				109	} else if (E > 0) {
				110	if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
				111	D <<= Shift;
				112	E -= Shift;
				113
				114	if (!E)
				115	Above0 = D;
				116	}
				117	} else if (E > -64) {
				118	Above0 = D >> -E;
				119	Below0 = D << (64 + E);
				120	} else if (E > -120) {
				121	Below0 = D >> (-E - 64);
				122	Extra = D << (128 + E);
				123	ExtraShift = -64 - E;
				124	}
				125
				126	// Fall back on APFloat for very small and very large numbers.
				127	if (!Above0 && !Below0)
				128	return toStringAPFloat(D, E, Precision);
				129
				130	// Append the digits before the decimal.
				131	std::string Str;
				132	size_t DigitsOut = 0;
				133	if (Above0) {
				134	appendNumber(Str, Above0);
				135	DigitsOut = Str.size();
				136	} else
				137	appendDigit(Str, 0);
				138	std::reverse(Str.begin(), Str.end());
				139
				140	// Return early if there's nothing after the decimal.
				141	if (!Below0)
				142	return Str + ".0";
				143
				144	// Append the decimal and beyond.
				145	Str += '.';
				146	uint64_t Error = UINT64_C(1) << (64 - Width);
				147
				148	// We need to shift Below0 to the right to make space for calculating
				149	// digits. Save the precision we're losing in Extra.
				150	Extra = (Below0 & 0xf) << 56 \| (Extra >> 8);
				151	Below0 >>= 4;
				152	size_t SinceDot = 0;
				153	size_t AfterDot = Str.size();
				154	do {
				155	if (ExtraShift) {
				156	--ExtraShift;
				157	Error *= 5;
				158	} else
				159	Error *= 10;
				160
				161	Below0 *= 10;
				162	Extra *= 10;
				163	Below0 += (Extra >> 60);
				164	Extra = Extra & (UINT64_MAX >> 4);
				165	appendDigit(Str, Below0 >> 60);
				166	Below0 = Below0 & (UINT64_MAX >> 4);
				167	if (DigitsOut \|\| Str.back() != '0')
				168	++DigitsOut;
				169	++SinceDot;
				170	} while (Error && (Below0 << 4 \| Extra >> 60) >= Error / 2 &&
				171	(!Precision \|\| DigitsOut <= Precision \|\| SinceDot < 2));
				172
				173	// Return early for maximum precision.
				174	if (!Precision \|\| DigitsOut <= Precision)
				175	return stripTrailingZeros(Str);
				176
				177	// Find where to truncate.
				178	size_t Truncate =
				179	std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
				180
				181	// Check if there's anything to truncate.
				182	if (Truncate >= Str.size())
				183	return stripTrailingZeros(Str);
				184
				185	bool Carry = doesRoundUp(Str[Truncate]);
				186	if (!Carry)
				187	return stripTrailingZeros(Str.substr(0, Truncate));
				188
				189	// Round with the first truncated digit.
				190	for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
				191	I != E; ++I) {
				192	if (*I == '.')
				193	continue;
				194	if (*I == '9') {
				195	*I = '0';
				196	continue;
				197	}
				198
				199	++*I;
				200	Carry = false;
				201	break;
				202	}
				203
				204	// Add "1" in front if we still need to carry.
				205	return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
				206	}
				207
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	208	raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	209	int Width, unsigned Precision) {
				210	return OS << toString(D, E, Width, Precision);
				211	}
				212
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	213	void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	214	print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
				215	<< "]";
				216	}
				217
				218	static std::pair<uint64_t, int16_t>
				219	getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
				220	if (ShouldRound)
				221	if (!++N)
				222	// Rounding caused an overflow.
				223	return std::make_pair(UINT64_C(1), Shift + 64);
				224	return std::make_pair(N, Shift);
				225	}
				226
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	227	std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	228	uint64_t Divisor) {
				229	// Input should be sanitized.
				230	assert(Divisor);
				231	assert(Dividend);
				232
				233	// Minimize size of divisor.
				234	int16_t Shift = 0;
				235	if (int Zeros = countTrailingZeros(Divisor)) {
				236	Shift -= Zeros;
				237	Divisor >>= Zeros;
				238	}
				239
				240	// Check for powers of two.
				241	if (Divisor == 1)
				242	return std::make_pair(Dividend, Shift);
				243
				244	// Maximize size of dividend.
				245	if (int Zeros = countLeadingZeros64(Dividend)) {
				246	Shift -= Zeros;
				247	Dividend <<= Zeros;
				248	}
				249
				250	// Start with the result of a divide.
				251	uint64_t Quotient = Dividend / Divisor;
				252	Dividend %= Divisor;
				253
				254	// Continue building the quotient with long division.
				255	//
				256	// TODO: continue with largers digits.
				257	while (!(Quotient >> 63) && Dividend) {
				258	// Shift Dividend, and check for overflow.
				259	bool IsOverflow = Dividend >> 63;
				260	Dividend <<= 1;
				261	--Shift;
				262
				263	// Divide.
				264	bool DoesDivide = IsOverflow \|\| Divisor <= Dividend;
				265	Quotient = (Quotient << 1) \| uint64_t(DoesDivide);
				266	Dividend -= DoesDivide ? Divisor : 0;
				267	}
				268
				269	// Round.
				270	if (Dividend >= getHalf(Divisor))
				271	if (!++Quotient)
				272	// Rounding caused an overflow in Quotient.
				273	return std::make_pair(UINT64_C(1), Shift + 64);
				274
				275	return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
				276	}
				277
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	278	std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	279	uint64_t R) {
				280	// Separate into two 32-bit digits (U.L).
				281	uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
				282
				283	// Compute cross products.
				284	uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
				285
				286	// Sum into two 64-bit digits.
				287	uint64_t Upper = P1, Lower = P4;
				288	auto addWithCarry = [&](uint64_t N) {
				289	uint64_t NewLower = Lower + (N << 32);
				290	Upper += (N >> 32) + (NewLower < Lower);
				291	Lower = NewLower;
				292	};
				293	addWithCarry(P2);
				294	addWithCarry(P3);
				295
				296	// Check whether the upper digit is empty.
				297	if (!Upper)
				298	return std::make_pair(Lower, 0);
				299
				300	// Shift as little as possible to maximize precision.
				301	unsigned LeadingZeros = countLeadingZeros64(Upper);
				302	int16_t Shift = 64 - LeadingZeros;
				303	if (LeadingZeros)
				304	Upper = Upper << LeadingZeros \| Lower >> Shift;
				305	bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
				306	return getRoundedFloat(Upper, ShouldRound, Shift);
				307	}
				308
				309	//===----------------------------------------------------------------------===//
				310	//
				311	// BlockMass implementation.
				312	//
				313	//===----------------------------------------------------------------------===//
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	314	UnsignedFloat<uint64_t> BlockMass::toFloat() const {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	315	if (isFull())
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	316	return UnsignedFloat<uint64_t>(1, 0);
				317	return UnsignedFloat<uint64_t>(getMass() + 1, -64);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	318	}
				319
				320	void BlockMass::dump() const { print(dbgs()); }
				321
				322	static char getHexDigit(int N) {
				323	assert(N < 16);
				324	if (N < 10)
				325	return '0' + N;
				326	return 'a' + N - 10;
				327	}
				328	raw_ostream &BlockMass::print(raw_ostream &OS) const {
				329	for (int Digits = 0; Digits < 16; ++Digits)
				330	OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
				331	return OS;
				332	}
				333
				334	//===----------------------------------------------------------------------===//
				335	//
				336	// BlockFrequencyInfoImpl implementation.
				337	//
				338	//===----------------------------------------------------------------------===//
				339	namespace {
				340
				341	typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
				342	typedef BlockFrequencyInfoImplBase::Distribution Distribution;
				343	typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
				344	typedef BlockFrequencyInfoImplBase::Float Float;
Duncan P. N. Exon Smith	cc88ebf	2014-04-22 03:31:31 +0000	[diff] [blame]	345	typedef BlockFrequencyInfoImplBase::LoopData LoopData;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	346	typedef BlockFrequencyInfoImplBase::Weight Weight;
				347	typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
				348
				349	/// \brief Dithering mass distributer.
				350	///
				351	/// This class splits up a single mass into portions by weight, dithering to
				352	/// spread out error. No mass is lost. The dithering precision depends on the
				353	/// precision of the product of \a BlockMass and \a BranchProbability.
				354	///
				355	/// The distribution algorithm follows.
				356	///
				357	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
				358	/// mass to distribute in \a RemMass.
				359	///
				360	/// 2. For each portion:
				361	///
				362	/// 1. Construct a branch probability, P, as the portion's weight divided
				363	/// by the current value of \a RemWeight.
				364	/// 2. Calculate the portion's mass as \a RemMass times P.
				365	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
				366	/// the current portion's weight and mass.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	367	struct DitheringDistributer {
				368	uint32_t RemWeight;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	369	BlockMass RemMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	370
				371	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
				372
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	373	BlockMass takeMass(uint32_t Weight);
				374	};
				375	}
				376
				377	DitheringDistributer::DitheringDistributer(Distribution &Dist,
				378	const BlockMass &Mass) {
				379	Dist.normalize();
				380	RemWeight = Dist.Total;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	381	RemMass = Mass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	382	}
				383
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	384	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
				385	assert(Weight && "invalid weight");
				386	assert(Weight <= RemWeight);
				387	BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
				388
				389	// Decrement totals (dither).
				390	RemWeight -= Weight;
				391	RemMass -= Mass;
				392	return Mass;
				393	}
				394
				395	void Distribution::add(const BlockNode &Node, uint64_t Amount,
				396	Weight::DistType Type) {
				397	assert(Amount && "invalid weight of 0");
				398	uint64_t NewTotal = Total + Amount;
				399
				400	// Check for overflow. It should be impossible to overflow twice.
				401	bool IsOverflow = NewTotal < Total;
				402	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
				403	DidOverflow \|= IsOverflow;
				404
				405	// Update the total.
				406	Total = NewTotal;
				407
				408	// Save the weight.
				409	Weight W;
				410	W.TargetNode = Node;
				411	W.Amount = Amount;
				412	W.Type = Type;
				413	Weights.push_back(W);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	414	}
				415
				416	static void combineWeight(Weight &W, const Weight &OtherW) {
				417	assert(OtherW.TargetNode.isValid());
				418	if (!W.Amount) {
				419	W = OtherW;
				420	return;
				421	}
				422	assert(W.Type == OtherW.Type);
				423	assert(W.TargetNode == OtherW.TargetNode);
Duncan P. N. Exon Smith	ebf7626	2014-04-25 04:38:40 +0000	[diff] [blame]	424	assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	425	W.Amount += OtherW.Amount;
				426	}
				427	static void combineWeightsBySorting(WeightList &Weights) {
				428	// Sort so edges to the same node are adjacent.
				429	std::sort(Weights.begin(), Weights.end(),
				430	[](const Weight &L,
				431	const Weight &R) { return L.TargetNode < R.TargetNode; });
				432
				433	// Combine adjacent edges.
				434	WeightList::iterator O = Weights.begin();
				435	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
				436	++O, (I = L)) {
				437	O = I;
				438
				439	// Find the adjacent weights to the same node.
				440	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
				441	combineWeight(O, L);
				442	}
				443
				444	// Erase extra entries.
				445	Weights.erase(O, Weights.end());
				446	return;
				447	}
				448	static void combineWeightsByHashing(WeightList &Weights) {
				449	// Collect weights into a DenseMap.
				450	typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
				451	HashTable Combined(NextPowerOf2(2 * Weights.size()));
				452	for (const Weight &W : Weights)
				453	combineWeight(Combined[W.TargetNode.Index], W);
				454
				455	// Check whether anything changed.
				456	if (Weights.size() == Combined.size())
				457	return;
				458
				459	// Fill in the new weights.
				460	Weights.clear();
				461	Weights.reserve(Combined.size());
				462	for (const auto &I : Combined)
				463	Weights.push_back(I.second);
				464	}
				465	static void combineWeights(WeightList &Weights) {
				466	// Use a hash table for many successors to keep this linear.
				467	if (Weights.size() > 128) {
				468	combineWeightsByHashing(Weights);
				469	return;
				470	}
				471
				472	combineWeightsBySorting(Weights);
				473	}
				474	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
				475	assert(Shift >= 0);
				476	assert(Shift < 64);
				477	if (!Shift)
				478	return N;
				479	return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
				480	}
				481	void Distribution::normalize() {
				482	// Early exit for termination nodes.
				483	if (Weights.empty())
				484	return;
				485
				486	// Only bother if there are multiple successors.
				487	if (Weights.size() > 1)
				488	combineWeights(Weights);
				489
				490	// Early exit when combined into a single successor.
				491	if (Weights.size() == 1) {
				492	Total = 1;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	493	Weights.front().Amount = 1;
				494	return;
				495	}
				496
				497	// Determine how much to shift right so that the total fits into 32-bits.
				498	//
				499	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
				500	// for each weight can cause a 32-bit overflow.
				501	int Shift = 0;
				502	if (DidOverflow)
				503	Shift = 33;
				504	else if (Total > UINT32_MAX)
				505	Shift = 33 - countLeadingZeros(Total);
				506
				507	// Early exit if nothing needs to be scaled.
				508	if (!Shift)
				509	return;
				510
				511	// Recompute the total through accumulation (rather than shifting it) so that
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	512	// it's accurate after shifting.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	513	Total = 0;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	514
				515	// Sum the weights to each node and shift right if necessary.
				516	for (Weight &W : Weights) {
				517	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
				518	// can round here without concern about overflow.
				519	assert(W.TargetNode.isValid());
				520	W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
				521	assert(W.Amount <= UINT32_MAX);
				522
				523	// Update the total.
				524	Total += W.Amount;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	525	}
				526	assert(Total <= UINT32_MAX);
				527	}
				528
				529	void BlockFrequencyInfoImplBase::clear() {
Duncan P. N. Exon Smith	dc2d66e	2014-04-22 03:31:34 +0000	[diff] [blame]	530	// Swap with a default-constructed std::vector, since std::vector<>::clear()
				531	// does not actually clear heap storage.
				532	std::vector<FrequencyData>().swap(Freqs);
				533	std::vector<WorkingData>().swap(Working);
Duncan P. N. Exon Smith	fc7dc93	2014-04-25 04:30:06 +0000	[diff] [blame]	534	Loops.clear();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	535	}
				536
				537	/// \brief Clear all memory not needed downstream.
				538	///
				539	/// Releases all memory not used downstream. In particular, saves Freqs.
				540	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
				541	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
				542	BFI.clear();
				543	BFI.Freqs = std::move(SavedFreqs);
				544	}
				545
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	546	bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	547	const LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	548	const BlockNode &Pred,
				549	const BlockNode &Succ,
				550	uint64_t Weight) {
				551	if (!Weight)
				552	Weight = 1;
				553
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	554	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
				555	return OuterLoop && OuterLoop->isHeader(Node);
				556	};
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	557
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	558	BlockNode Resolved = Working[Succ.Index].getResolvedNode();
				559
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	560	#ifndef NDEBUG
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	561	auto debugSuccessor = [&](const char *Type) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	562	dbgs() << " =>"
				563	<< " [" << Type << "] weight = " << Weight;
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	564	if (!isLoopHeader(Resolved))
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	565	dbgs() << ", succ = " << getBlockName(Succ);
				566	if (Resolved != Succ)
				567	dbgs() << ", resolved = " << getBlockName(Resolved);
				568	dbgs() << "\n";
				569	};
				570	(void)debugSuccessor;
				571	#endif
				572
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	573	if (isLoopHeader(Resolved)) {
				574	DEBUG(debugSuccessor("backedge"));
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	575	Dist.addBackedge(OuterLoop->getHeader(), Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	576	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	577	}
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	578
Duncan P. N. Exon Smith	39cc648	2014-04-25 04:38:06 +0000	[diff] [blame]	579	if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	580	DEBUG(debugSuccessor(" exit "));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	581	Dist.addExit(Resolved, Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	582	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	583	}
				584
Duncan P. N. Exon Smith	b3380ea	2014-04-22 03:31:53 +0000	[diff] [blame]	585	if (Resolved < Pred) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	586	if (!isLoopHeader(Pred)) {
				587	// If OuterLoop is an irreducible loop, we can't actually handle this.
				588	assert((!OuterLoop \|\| !OuterLoop->isIrreducible()) &&
				589	"unhandled irreducible control flow");
				590
				591	// Irreducible backedge. Abort.
				592	DEBUG(debugSuccessor("abort!!!"));
				593	return false;
				594	}
				595
				596	// If "Pred" is a loop header, then this isn't really a backedge; rather,
				597	// OuterLoop must be irreducible. These false backedges can come only from
				598	// secondary loop headers.
				599	assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
				600	"unhandled irreducible control flow");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	601	}
				602
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	603	DEBUG(debugSuccessor(" local "));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	604	Dist.addLocal(Resolved, Weight);
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	605	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	606	}
				607
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	608	bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	609	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	610	// Copy the exit map into Dist.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	611	for (const auto &I : Loop.Exits)
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	612	if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
				613	I.second.getMass()))
				614	// Irreducible backedge.
				615	return false;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	616
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	617	return true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	618	}
				619
				620	/// \brief Get the maximum allowed loop scale.
				621	///
Duncan P. N. Exon Smith	254689f	2014-04-21 18:31:58 +0000	[diff] [blame]	622	/// Gives the maximum number of estimated iterations allowed for a loop. Very
				623	/// large numbers cause problems downstream (even within 64-bits).
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	624	static Float getMaxLoopScale() { return Float(1, 12); }
				625
				626	/// \brief Compute the loop scale for a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	627	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	628	// Compute loop scale.
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	629	DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	630
				631	// LoopScale == 1 / ExitMass
				632	// ExitMass == HeadMass - BackedgeMass
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	633	BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	634
				635	// Block scale stores the inverse of the scale.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	636	Loop.Scale = ExitMass.toFloat().inverse();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	637
				638	DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	639	<< " - " << Loop.BackedgeMass << ")\n"
				640	<< " - scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	641
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	642	if (Loop.Scale > getMaxLoopScale()) {
				643	Loop.Scale = getMaxLoopScale();
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	644	DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
				645	}
				646	}
				647
				648	/// \brief Package up a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	649	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	650	DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
				651
				652	// Clear the subloop exits to prevent quadratic memory usage.
				653	for (const BlockNode &M : Loop.Nodes) {
				654	if (auto *Loop = Working[M.Index].getPackagedLoop())
				655	Loop->Exits.clear();
				656	DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
				657	}
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	658	Loop.IsPackaged = true;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	659	}
				660
				661	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	662	LoopData *OuterLoop,
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	663	Distribution &Dist) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	664	BlockMass Mass = Working[Source.Index].getMass();
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	665	DEBUG(dbgs() << " => mass: " << Mass << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	666
				667	// Distribute mass to successors as laid out in Dist.
				668	DitheringDistributer D(Dist, Mass);
				669
				670	#ifndef NDEBUG
				671	auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
				672	const char *Desc) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	673	dbgs() << " => assign " << M << " (" << D.RemMass << ")";
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	674	if (Desc)
				675	dbgs() << " [" << Desc << "]";
				676	if (T.isValid())
				677	dbgs() << " to " << getBlockName(T);
				678	dbgs() << "\n";
				679	};
				680	(void)debugAssign;
				681	#endif
				682
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	683	for (const Weight &W : Dist.Weights) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	684	// Check for a local edge (non-backedge and non-exit).
				685	BlockMass Taken = D.takeMass(W.Amount);
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	686	if (W.Type == Weight::Local) {
Duncan P. N. Exon Smith	da5eaed	2014-04-25 18:47:04 +0000	[diff] [blame]	687	Working[W.TargetNode.Index].getMass() += Taken;
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	688	DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	689	continue;
				690	}
				691
				692	// Backedges and exits only make sense if we're processing a loop.
Duncan P. N. Exon Smith	d132040	2014-04-25 04:38:01 +0000	[diff] [blame]	693	assert(OuterLoop && "backedge or exit outside of loop");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	694
				695	// Check for a backedge.
				696	if (W.Type == Weight::Backedge) {
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	697	OuterLoop->BackedgeMass += Taken;
				698	DEBUG(debugAssign(BlockNode(), Taken, "back"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	699	continue;
				700	}
				701
				702	// This must be an exit.
				703	assert(W.Type == Weight::Exit);
Duncan P. N. Exon Smith	cb7d29d	2014-04-25 04:38:43 +0000	[diff] [blame]	704	OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
				705	DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	706	}
				707	}
				708
				709	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
				710	const Float &Min, const Float &Max) {
				711	// Scale the Factor to a size that creates integers. Ideally, integers would
				712	// be scaled so that Max == UINT64_MAX so that they can be best
				713	// differentiated. However, the register allocator currently deals poorly
				714	// with large numbers. Instead, push Min up a little from 1 to give some
				715	// room to differentiate small, unequal numbers.
				716	//
				717	// TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
				718	Float ScalingFactor = Min.inverse();
				719	if ((Max / Min).lg() < 60)
				720	ScalingFactor <<= 3;
				721
				722	// Translate the floats to integers.
				723	DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
				724	<< ", factor = " << ScalingFactor << "\n");
				725	for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
				726	Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
				727	BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
				728	DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
				729	<< BFI.Freqs[Index].Floating << ", scaled = " << Scaled
				730	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
				731	}
				732	}
				733
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	734	/// \brief Unwrap a loop package.
				735	///
				736	/// Visits all the members of a loop, adjusting their BlockData according to
				737	/// the loop's pseudo-node.
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	738	static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	739	DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	740	<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
				741	<< "\n");
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	742	Loop.Scale *= Loop.Mass.toFloat();
				743	Loop.IsPackaged = false;
Duncan P. N. Exon Smith	3f08678	2014-04-25 04:38:32 +0000	[diff] [blame]	744	DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	745
				746	// Propagate the head scale through the loop. Since members are visited in
				747	// RPO, the head scale will be updated by the loop scale first, and then the
				748	// final head scale will be used for updated the rest of the members.
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	749	for (const BlockNode &N : Loop.Nodes) {
				750	const auto &Working = BFI.Working[N.Index];
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	751	Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
Duncan P. N. Exon Smith	5291d2a	2014-04-25 04:38:27 +0000	[diff] [blame]	752	: BFI.Freqs[N.Index].Floating;
				753	Float New = Loop.Scale * F;
				754	DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
				755	<< "\n");
				756	F = New;
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	757	}
				758	}
				759
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	760	void BlockFrequencyInfoImplBase::unwrapLoops() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	761	// Set initial frequencies from loop-local masses.
				762	for (size_t Index = 0; Index < Working.size(); ++Index)
				763	Freqs[Index].Floating = Working[Index].Mass.toFloat();
				764
Duncan P. N. Exon Smith	da0b21c	2014-04-25 04:38:23 +0000	[diff] [blame]	765	for (LoopData &Loop : Loops)
Duncan P. N. Exon Smith	0633f0e	2014-04-25 04:38:25 +0000	[diff] [blame]	766	unwrapLoop(*this, Loop);
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	767	}
				768
				769	void BlockFrequencyInfoImplBase::finalizeMetrics() {
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	770	// Unwrap loop packages in reverse post-order, tracking min and max
				771	// frequencies.
				772	auto Min = Float::getLargest();
				773	auto Max = Float::getZero();
				774	for (size_t Index = 0; Index < Working.size(); ++Index) {
Duncan P. N. Exon Smith	46d9a56	2014-04-25 04:38:17 +0000	[diff] [blame]	775	// Update min/max scale.
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	776	Min = std::min(Min, Freqs[Index].Floating);
				777	Max = std::max(Max, Freqs[Index].Floating);
				778	}
				779
				780	// Convert to integers.
				781	convertFloatingToInteger(*this, Min, Max);
				782
				783	// Clean up data structures.
				784	cleanup(*this);
				785
				786	// Print out the final stats.
				787	DEBUG(dump());
				788	}
				789
				790	BlockFrequency
				791	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
				792	if (!Node.isValid())
				793	return 0;
				794	return Freqs[Node.Index].Integer;
				795	}
				796	Float
				797	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
				798	if (!Node.isValid())
				799	return Float::getZero();
				800	return Freqs[Node.Index].Floating;
				801	}
				802
				803	std::string
				804	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
				805	return std::string();
				806	}
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	807	std::string
				808	BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
				809	return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "*" : "");
				810	}
Duncan P. N. Exon Smith	10be9a8	2014-04-21 17:57:07 +0000	[diff] [blame]	811
				812	raw_ostream &
				813	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				814	const BlockNode &Node) const {
				815	return OS << getFloatingBlockFreq(Node);
				816	}
				817
				818	raw_ostream &
				819	BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
				820	const BlockFrequency &Freq) const {
				821	Float Block(Freq.getFrequency(), 0);
				822	Float Entry(getEntryFreq(), 0);
				823
				824	return OS << Block / Entry;
				825	}
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	826
				827	void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
				828	Start = OuterLoop.getHeader();
				829	Nodes.reserve(OuterLoop.Nodes.size());
				830	for (auto N : OuterLoop.Nodes)
				831	addNode(N);
				832	indexNodes();
				833	}
				834	void IrreducibleGraph::addNodesInFunction() {
				835	Start = 0;
				836	for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
				837	if (!BFI.Working[Index].isPackaged())
				838	addNode(Index);
				839	indexNodes();
				840	}
				841	void IrreducibleGraph::indexNodes() {
				842	for (auto &I : Nodes)
				843	Lookup[I.Node.Index] = &I;
				844	}
				845	void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
				846	const BFIBase::LoopData *OuterLoop) {
				847	if (OuterLoop && OuterLoop->isHeader(Succ))
				848	return;
				849	auto L = Lookup.find(Succ.Index);
				850	if (L == Lookup.end())
				851	return;
				852	IrrNode &SuccIrr = *L->second;
				853	Irr.Edges.push_back(&SuccIrr);
				854	SuccIrr.Edges.push_front(&Irr);
				855	++SuccIrr.NumIn;
				856	}
				857
				858	namespace llvm {
				859	template <> struct GraphTraits<IrreducibleGraph> {
				860	typedef bfi_detail::IrreducibleGraph GraphT;
				861
Duncan P. N. Exon Smith	295b5e7	2014-04-28 20:22:29 +0000	[diff] [blame]	862	typedef const GraphT::IrrNode NodeType;
				863	typedef GraphT::IrrNode::iterator ChildIteratorType;
Duncan P. N. Exon Smith	c5a3139	2014-04-28 20:02:29 +0000	[diff] [blame]	864
				865	static const NodeType *getEntryNode(const GraphT &G) {
				866	return G.StartIrr;
				867	}
				868	static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
				869	static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
				870	};
				871	}
				872
				873	/// \brief Find extra irreducible headers.
				874	///
				875	/// Find entry blocks and other blocks with backedges, which exist when \c G
				876	/// contains irreducible sub-SCCs.
				877	static void findIrreducibleHeaders(
				878	const BlockFrequencyInfoImplBase &BFI,
				879	const IrreducibleGraph &G,
				880	const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
				881	LoopData::NodeList &Headers, LoopData::NodeList &Others) {
				882	// Map from nodes in the SCC to whether it's an entry block.
				883	SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
				884
				885	// InSCC also acts the set of nodes in the graph. Seed it.
				886	for (const auto *I : SCC)
				887	InSCC[I] = false;
				888
				889	for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
				890	auto &Irr = *I->first;
				891	for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
				892	if (InSCC.count(P))
				893	continue;
				894
				895	// This is an entry block.
				896	I->second = true;
				897	Headers.push_back(Irr.Node);
				898	DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
				899	break;
				900	}
				901	}
				902	assert(Headers.size() >= 2 && "Should be irreducible");
				903	if (Headers.size() == InSCC.size()) {
				904	// Every block is a header.
				905	std::sort(Headers.begin(), Headers.end());
				906	return;
				907	}
				908
				909	// Look for extra headers from irreducible sub-SCCs.
				910	for (const auto &I : InSCC) {
				911	// Entry blocks are already headers.
				912	if (I.second)
				913	continue;
				914
				915	auto &Irr = *I.first;
				916	for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
				917	// Skip forward edges.
				918	if (P->Node < Irr.Node)
				919	continue;
				920
				921	// Skip predecessors from entry blocks. These can have inverted
				922	// ordering.
				923	if (InSCC.lookup(P))
				924	continue;
				925
				926	// Store the extra header.
				927	Headers.push_back(Irr.Node);
				928	DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
				929	break;
				930	}
				931	if (Headers.back() == Irr.Node)
				932	// Added this as a header.
				933	continue;
				934
				935	// This is not a header.
				936	Others.push_back(Irr.Node);
				937	DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
				938	}
				939	std::sort(Headers.begin(), Headers.end());
				940	std::sort(Others.begin(), Others.end());
				941	}
				942
				943	static void createIrreducibleLoop(
				944	BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
				945	LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
				946	const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
				947	// Translate the SCC into RPO.
				948	DEBUG(dbgs() << " - found-scc\n");
				949
				950	LoopData::NodeList Headers;
				951	LoopData::NodeList Others;
				952	findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
				953
				954	auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
				955	Headers.end(), Others.begin(), Others.end());
				956
				957	// Update loop hierarchy.
				958	for (const auto &N : Loop->Nodes)
				959	if (BFI.Working[N.Index].isLoopHeader())
				960	BFI.Working[N.Index].Loop->Parent = &*Loop;
				961	else
				962	BFI.Working[N.Index].Loop = &*Loop;
				963	}
				964
				965	iterator_range<std::list<LoopData>::iterator>
				966	BlockFrequencyInfoImplBase::analyzeIrreducible(
				967	const IrreducibleGraph &G, LoopData *OuterLoop,
				968	std::list<LoopData>::iterator Insert) {
				969	assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
				970	auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
				971
				972	for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
				973	if (I->size() < 2)
				974	continue;
				975
				976	// Translate the SCC into RPO.
				977	createIrreducibleLoop(this, G, OuterLoop, Insert, I);
				978	}
				979
				980	if (OuterLoop)
				981	return make_range(std::next(Prev), Insert);
				982	return make_range(Loops.begin(), Insert);
				983	}
				984
				985	void
				986	BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
				987	OuterLoop.Exits.clear();
				988	OuterLoop.BackedgeMass = BlockMass::getEmpty();
				989	auto O = OuterLoop.Nodes.begin() + 1;
				990	for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
				991	if (!Working[I->Index].isPackaged())
				992	O++ = I;
				993	OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
				994	}