Blame - llvm/tools/llvm-xray/xray-stacks.cc - toolchain/llvm-project

blob: 63982634576fcef7ba6f5cf397a6240102ff47ac [file] [log] [blame]

Keith Wyss	9420ec3	2017-09-07 18:07:48 +0000	[diff] [blame^]	1	//===- xray-stacks.cc - XRay Function Call Stack Accounting ---------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements stack-based accounting. It takes XRay traces, and
				11	// collates statistics across these traces to show a breakdown of time spent
				12	// at various points of the stack to provide insight into which functions
				13	// spend the most time in terms of a call stack. We provide a few
				14	// sorting/filtering options for zero'ing in on the useful stacks.
				15	//
				16	//===----------------------------------------------------------------------===//
				17
				18	#include <forward_list>
				19	#include <numeric>
				20
				21	#include "func-id-helper.h"
				22	#include "xray-registry.h"
				23	#include "llvm/ADT/StringExtras.h"
				24	#include "llvm/Support/CommandLine.h"
				25	#include "llvm/Support/Errc.h"
				26	#include "llvm/Support/ErrorHandling.h"
				27	#include "llvm/Support/FormatAdapters.h"
				28	#include "llvm/Support/FormatVariadic.h"
				29	#include "llvm/XRay/Graph.h"
				30	#include "llvm/XRay/InstrumentationMap.h"
				31	#include "llvm/XRay/Trace.h"
				32
				33	using namespace llvm;
				34	using namespace llvm::xray;
				35
				36	static cl::SubCommand Stack("stack", "Call stack accounting");
				37	static cl::list<std::string> StackInputs(cl::Positional,
				38	cl::desc("<xray trace>"), cl::Required,
				39	cl::sub(Stack), cl::OneOrMore);
				40
				41	static cl::opt<bool>
				42	StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
				43	cl::sub(Stack), cl::init(false));
				44	static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing),
				45	cl::desc("Alias for -keep-going"),
				46	cl::sub(Stack));
				47
				48	// TODO: Does there need to be an option to deduce tail or sibling calls?
				49
				50	static cl::opt<std::string> StacksInstrMap(
				51	"instr_map",
				52	cl::desc("instrumentation map used to identify function ids. "
				53	"Currently supports elf file instrumentation maps."),
				54	cl::sub(Stack), cl::init(""));
				55	static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap),
				56	cl::desc("Alias for -instr_map"),
				57	cl::sub(Stack));
				58
				59	static cl::opt<bool>
				60	SeparateThreadStacks("per-thread-stacks",
				61	cl::desc("Report top stacks within each thread id"),
				62	cl::sub(Stack), cl::init(false));
				63
				64	static cl::opt<bool>
				65	AggregateThreads("aggregate-threads",
				66	cl::desc("Aggregate stack times across threads"),
				67	cl::sub(Stack), cl::init(false));
				68
				69	/// A helper struct to work with formatv and XRayRecords. Makes it easier to use
				70	/// instrumentation map names or addresses in formatted output.
				71	struct format_xray_record : public FormatAdapter<XRayRecord> {
				72	explicit format_xray_record(XRayRecord record,
				73	const FuncIdConversionHelper &conv)
				74	: FormatAdapter<XRayRecord>(std::move(record)), Converter(&conv) {}
				75	void format(raw_ostream &Stream, StringRef Style) override {
				76	Stream << formatv(
				77	"{FuncId: \"{0}\", ThreadId: \"{1}\", RecordType: \"{2}\"}",
				78	Converter->SymbolOrNumber(Item.FuncId), Item.TId,
				79	DecodeRecordType(Item.RecordType));
				80	}
				81
				82	private:
				83	Twine DecodeRecordType(uint16_t recordType) {
				84	switch (recordType) {
				85	case 0:
				86	return Twine("Fn Entry");
				87	case 1:
				88	return Twine("Fn Exit");
				89	default:
				90	// TODO: Add Tail exit when it is added to llvm/XRay/XRayRecord.h
				91	return Twine("Unknown");
				92	}
				93	}
				94
				95	const FuncIdConversionHelper *Converter;
				96	};
				97
				98	/// The stack command will take a set of XRay traces as arguments, and collects
				99	/// information about the stacks of instrumented functions that appear in the
				100	/// traces. We track the following pieces of information:
				101	///
				102	/// - Total time: amount of time/cycles accounted for in the traces.
				103	/// - Stack count: number of times a specific stack appears in the
				104	/// traces. Only instrumented functions show up in stacks.
				105	/// - Cumulative stack time: amount of time spent in a stack accumulated
				106	/// across the invocations in the traces.
				107	/// - Cumulative local time: amount of time spent in each instrumented
				108	/// function showing up in a specific stack, accumulated across the traces.
				109	///
				110	/// Example output for the kind of data we'd like to provide looks like the
				111	/// following:
				112	///
				113	/// Total time: 3.33234 s
				114	/// Stack ID: ...
				115	/// Stack Count: 2093
				116	/// # Function Local Time (%) Stack Time (%)
				117	/// 0 main 2.34 ms 0.07% 3.33234 s 100%
				118	/// 1 foo() 3.30000 s 99.02% 3.33 s 99.92%
				119	/// 2 bar() 30 ms 0.90% 30 ms 0.90%
				120	///
				121	/// We can also show distributions of the function call durations with
				122	/// statistics at each level of the stack. This works by doing the following
				123	/// algorithm:
				124	///
				125	/// 1. When unwinding, record the duration of each unwound function associated
				126	/// with the path up to which the unwinding stops. For example:
				127	///
				128	/// Step Duration (? means has start time)
				129	///
				130	/// push a <start time> a = ?
				131	/// push b <start time> a = ?, a->b = ?
				132	/// push c <start time> a = ?, a->b = ?, a->b->c = ?
				133	/// pop c <end time> a = ?, a->b = ?, emit duration(a->b->c)
				134	/// pop b <end time> a = ?, emit duration(a->b)
				135	/// push c <start time> a = ?, a->c = ?
				136	/// pop c <end time> a = ?, emit duration(a->c)
				137	/// pop a <end time> emit duration(a)
				138	///
				139	/// 2. We then account for the various stacks we've collected, and for each of
				140	/// them will have measurements that look like the following (continuing
				141	/// with the above simple example):
				142	///
				143	/// c : [<id("a->b->c"), [durations]>, <id("a->c"), [durations]>]
				144	/// b : [<id("a->b"), [durations]>]
				145	/// a : [<id("a"), [durations]>]
				146	///
				147	/// This allows us to compute, for each stack id, and each function that
				148	/// shows up in the stack, some important statistics like:
				149	///
				150	/// - median
				151	/// - 99th percentile
				152	/// - mean + stddev
				153	/// - count
				154	///
				155	/// 3. For cases where we don't have durations for some of the higher levels
				156	/// of the stack (perhaps instrumentation wasn't activated when the stack was
				157	/// entered), we can mark them appropriately.
				158	///
				159	/// Computing this data also allows us to implement lookup by call stack nodes,
				160	/// so that we can find functions that show up in multiple stack traces and
				161	/// show the statistical properties of that function in various contexts. We
				162	/// can compute information similar to the following:
				163	///
				164	/// Function: 'c'
				165	/// Stacks: 2 / 2
				166	/// Stack ID: ...
				167	/// Stack Count: ...
				168	/// # Function ...
				169	/// 0 a ...
				170	/// 1 b ...
				171	/// 2 c ...
				172	///
				173	/// Stack ID: ...
				174	/// Stack Count: ...
				175	/// # Function ...
				176	/// 0 a ...
				177	/// 1 c ...
				178	/// ----------------...
				179	///
				180	/// Function: 'b'
				181	/// Stacks: 1 / 2
				182	/// Stack ID: ...
				183	/// Stack Count: ...
				184	/// # Function ...
				185	/// 0 a ...
				186	/// 1 b ...
				187	/// 2 c ...
				188	///
				189	///
				190	/// To do this we require a Trie data structure that will allow us to represent
				191	/// all the call stacks of instrumented functions in an easily traversible
				192	/// manner when we do the aggregations and lookups. For instrumented call
				193	/// sequences like the following:
				194	///
				195	/// a()
				196	/// b()
				197	/// c()
				198	/// d()
				199	/// c()
				200	///
				201	/// We will have a representation like so:
				202	///
				203	/// a -> b -> c
				204	/// \| \|
				205	/// \| +--> d
				206	/// \|
				207	/// +--> c
				208	///
				209	/// We maintain a sequence of durations on the leaves and in the internal nodes
				210	/// as we go through and process every record from the XRay trace. We also
				211	/// maintain an index of unique functions, and provide a means of iterating
				212	/// through all the instrumented call stacks which we know about.
				213
				214	struct TrieNode {
				215	int32_t FuncId;
				216	TrieNode *Parent;
				217	SmallVector<TrieNode *, 4> Callees;
				218	// Separate durations depending on whether the node is the deepest node in the
				219	// stack.
				220	SmallVector<int64_t, 4> TerminalDurations;
				221	SmallVector<int64_t, 4> IntermediateDurations;
				222	};
				223
				224	/// Merges together two TrieNodes with like function ids, aggregating their
				225	/// callee lists and durations. The caller must provide storage where new merged
				226	/// nodes can be allocated in the form of a linked list.
				227	TrieNode *mergeTrieNodes(const TrieNode &Left, const TrieNode &Right,
				228	TrieNode *NewParent,
				229	std::forward_list<TrieNode> &NodeStore) {
				230	assert(Left.FuncId == Right.FuncId);
				231	NodeStore.push_front(TrieNode{Left.FuncId, NewParent, {}, {}, {}});
				232	auto I = NodeStore.begin();
				233	auto Node = &I;
				234
				235	// Build a map of callees from the left side.
				236	DenseMap<int32_t, TrieNode *> LeftCalleesByFuncId;
				237	for (auto *Callee : Left.Callees) {
				238	LeftCalleesByFuncId[Callee->FuncId] = Callee;
				239	}
				240
				241	// Iterate through the right side, either merging with the map values or
				242	// directly adding to the Callees vector. The iteration also removes any
				243	// merged values from the left side map.
				244	for (auto *Callee : Right.Callees) {
				245	auto iter = LeftCalleesByFuncId.find(Callee->FuncId);
				246	if (iter != LeftCalleesByFuncId.end()) {
				247	Node->Callees.push_back(
				248	mergeTrieNodes((iter->second), Callee, Node, NodeStore));
				249	LeftCalleesByFuncId.erase(iter);
				250	} else {
				251	Node->Callees.push_back(Callee);
				252	}
				253	}
				254
				255	// Add any callees that weren't found in the right side.
				256	for (auto MapPairIter : LeftCalleesByFuncId) {
				257	Node->Callees.push_back(MapPairIter.second);
				258	}
				259
				260	// Aggregate the durations.
				261	for (auto duration : Left.TerminalDurations) {
				262	Node->TerminalDurations.push_back(duration);
				263	}
				264	for (auto duration : Right.TerminalDurations) {
				265	Node->TerminalDurations.push_back(duration);
				266	}
				267	for (auto duration : Left.IntermediateDurations) {
				268	Node->IntermediateDurations.push_back(duration);
				269	}
				270	for (auto duration : Right.IntermediateDurations) {
				271	Node->IntermediateDurations.push_back(duration);
				272	}
				273
				274	return Node;
				275	}
				276
				277	class StackTrie {
				278
				279	// We maintain pointers to the roots of the tries we see.
				280	DenseMap<uint32_t, SmallVector<TrieNode *, 4>> Roots;
				281
				282	// We make sure all the nodes are accounted for in this list.
				283	std::forward_list<TrieNode> NodeStore;
				284
				285	// A map of thread ids to pairs call stack trie nodes and their start times.
				286	DenseMap<uint32_t, SmallVector<std::pair<TrieNode *, uint64_t>, 8>>
				287	ThreadStackMap;
				288
				289	TrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId,
				290	TrieNode *Parent) {
				291	NodeStore.push_front(TrieNode{FuncId, Parent, {}, {}, {}});
				292	auto I = NodeStore.begin();
				293	auto Node = &I;
				294	if (!Parent)
				295	Roots[ThreadId].push_back(Node);
				296	return Node;
				297	}
				298
				299	TrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) {
				300	const auto &RootsByThread = Roots[ThreadId];
				301	auto I = find_if(RootsByThread,
				302	[&](TrieNode *N) { return N->FuncId == FuncId; });
				303	return (I == RootsByThread.end()) ? nullptr : *I;
				304	}
				305
				306	public:
				307	enum class AccountRecordStatus {
				308	OK, // Successfully processed
				309	ENTRY_NOT_FOUND, // An exit record had no matching call stack entry
				310	UNKNOWN_RECORD_TYPE
				311	};
				312
				313	struct AccountRecordState {
				314	// We keep track of whether the call stack is currently unwinding.
				315	bool wasLastRecordExit;
				316
				317	static AccountRecordState CreateInitialState() { return {false}; }
				318	};
				319
				320	AccountRecordStatus accountRecord(const XRayRecord &R,
				321	AccountRecordState *state) {
				322	auto &TS = ThreadStackMap[R.TId];
				323	switch (R.Type) {
				324	case RecordTypes::ENTER: {
				325	state->wasLastRecordExit = false;
				326	// When we encounter a new function entry, we want to record the TSC for
				327	// that entry, and the function id. Before doing so we check the top of
				328	// the stack to see if there are callees that already represent this
				329	// function.
				330	if (TS.empty()) {
				331	auto *Root = findRootNode(R.TId, R.FuncId);
				332	TS.emplace_back(Root ? Root : createTrieNode(R.TId, R.FuncId, nullptr),
				333	R.TSC);
				334	return AccountRecordStatus::OK;
				335	}
				336
				337	auto &Top = TS.back();
				338	auto I = find_if(Top.first->Callees,
				339	[&](TrieNode *N) { return N->FuncId == R.FuncId; });
				340	if (I == Top.first->Callees.end()) {
				341	// We didn't find the callee in the stack trie, so we're going to
				342	// add to the stack then set up the pointers properly.
				343	auto N = createTrieNode(R.TId, R.FuncId, Top.first);
				344	Top.first->Callees.emplace_back(N);
				345
				346	// Top may be invalidated after this statement.
				347	TS.emplace_back(N, R.TSC);
				348	} else {
				349	// We found the callee in the stack trie, so we'll use that pointer
				350	// instead, add it to the stack associated with the TSC.
				351	TS.emplace_back(*I, R.TSC);
				352	}
				353	return AccountRecordStatus::OK;
				354	}
				355	case RecordTypes::EXIT: {
				356	bool wasLastRecordExit = state->wasLastRecordExit;
				357	state->wasLastRecordExit = true;
				358	// The exit case is more interesting, since we want to be able to deduce
				359	// missing exit records. To do that properly, we need to look up the stack
				360	// and see whether the exit record matches any of the entry records. If it
				361	// does match, we attempt to record the durations as we pop the stack to
				362	// where we see the parent.
				363	if (TS.empty()) {
				364	// Short circuit, and say we can't find it.
				365
				366	return AccountRecordStatus::ENTRY_NOT_FOUND;
				367	}
				368
				369	auto FunctionEntryMatch =
				370	find_if(reverse(TS), [&](const std::pair<TrieNode *, uint64_t> &E) {
				371	return E.first->FuncId == R.FuncId;
				372	});
				373	auto status = AccountRecordStatus::OK;
				374	if (FunctionEntryMatch == TS.rend()) {
				375	status = AccountRecordStatus::ENTRY_NOT_FOUND;
				376	} else {
				377	// Account for offset of 1 between reverse and forward iterators. We
				378	// want the forward iterator to include the function that is exited.
				379	++FunctionEntryMatch;
				380	}
				381	auto I = FunctionEntryMatch.base();
				382	for (auto &E : make_range(I, TS.end() - 1))
				383	E.first->IntermediateDurations.push_back(std::max(E.second, R.TSC) -
				384	std::min(E.second, R.TSC));
				385	auto &Deepest = TS.back();
				386	if (wasLastRecordExit)
				387	Deepest.first->IntermediateDurations.push_back(
				388	std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
				389	else
				390	Deepest.first->TerminalDurations.push_back(
				391	std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
				392	TS.erase(I, TS.end());
				393	return status;
				394	}
				395	}
				396	return AccountRecordStatus::UNKNOWN_RECORD_TYPE;
				397	}
				398
				399	bool isEmpty() const { return Roots.empty(); }
				400
				401	void printStack(raw_ostream &OS, const TrieNode *Top,
				402	FuncIdConversionHelper &FN) {
				403	// Traverse the pointers up to the parent, noting the sums, then print
				404	// in reverse order (callers at top, callees down bottom).
				405	SmallVector<const TrieNode *, 8> CurrentStack;
				406	for (auto *F = Top; F != nullptr; F = F->Parent)
				407	CurrentStack.push_back(F);
				408	int Level = 0;
				409	OS << formatv("{0,-5} {1,-60} {2,+12} {3,+16}\n", "lvl", "function",
				410	"count", "sum");
				411	for (auto *F :
				412	reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) {
				413	auto Sum = std::accumulate(F->IntermediateDurations.begin(),
				414	F->IntermediateDurations.end(), 0LL);
				415	auto FuncId = FN.SymbolOrNumber(F->FuncId);
				416	OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
				417	FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId,
				418	F->IntermediateDurations.size(), Sum);
				419	}
				420	auto Leaf = CurrentStack.begin();
				421	auto LeafSum = std::accumulate(Leaf->TerminalDurations.begin(),
				422	Leaf->TerminalDurations.end(), 0LL);
				423	auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId);
				424	OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
				425	LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..."
				426	: LeafFuncId,
				427	Leaf->TerminalDurations.size(), LeafSum);
				428	OS << "\n";
				429	}
				430
				431	/// Prints top stacks for each thread.
				432	void printPerThread(raw_ostream &OS, FuncIdConversionHelper &FN) {
				433	for (auto iter : Roots) {
				434	OS << "Thread " << iter.first << ":\n";
				435	print(OS, FN, iter.second);
				436	OS << "\n";
				437	}
				438	}
				439
				440	/// Prints top stacks from looking at all the leaves and ignoring thread IDs.
				441	/// Stacks that consist of the same function IDs but were called in different
				442	/// thread IDs are not considered unique in this printout.
				443	void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
				444	SmallVector<TrieNode *, 4> RootValues;
				445
				446	// Function to pull the values out of a map iterator.
				447	using RootsType = decltype(Roots.begin())::value_type;
				448	auto MapValueFn = [](const RootsType &Value) { return Value.second; };
				449
				450	for (const auto &RootNodeRange :
				451	make_range(map_iterator(Roots.begin(), MapValueFn),
				452	map_iterator(Roots.end(), MapValueFn))) {
				453	for (auto *RootNode : RootNodeRange)
				454	RootValues.push_back(RootNode);
				455	}
				456
				457	print(OS, FN, RootValues);
				458	}
				459
				460	/// Merges the trie by thread id before printing top stacks.
				461	void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
				462	std::forward_list<TrieNode> AggregatedNodeStore;
				463	SmallVector<TrieNode *, 4> RootValues;
				464	for (auto MapIter : Roots) {
				465	const auto &RootNodeVector = MapIter.second;
				466	for (auto *Node : RootNodeVector) {
				467	auto MaybeFoundIter = find_if(RootValues, [Node](TrieNode *elem) {
				468	return Node->FuncId == elem->FuncId;
				469	});
				470	if (MaybeFoundIter == RootValues.end()) {
				471	RootValues.push_back(Node);
				472	} else {
				473	RootValues.push_back(mergeTrieNodes(*MaybeFoundIter, Node, nullptr,
				474	AggregatedNodeStore));
				475	RootValues.erase(MaybeFoundIter);
				476	}
				477	}
				478	}
				479	print(OS, FN, RootValues);
				480	}
				481
				482	void print(raw_ostream &OS, FuncIdConversionHelper &FN,
				483	SmallVector<TrieNode *, 4> RootValues) {
				484	// Go through each of the roots, and traverse the call stack, producing the
				485	// aggregates as you go along. Remember these aggregates and stacks, and
				486	// show summary statistics about:
				487	//
				488	// - Total number of unique stacks
				489	// - Top 10 stacks by count
				490	// - Top 10 stacks by aggregate duration
				491	SmallVector<std::pair<const TrieNode *, uint64_t>, 11> TopStacksByCount;
				492	SmallVector<std::pair<const TrieNode *, uint64_t>, 11> TopStacksBySum;
				493	auto greater_second = [](const std::pair<const TrieNode *, uint64_t> &A,
				494	const std::pair<const TrieNode *, uint64_t> &B) {
				495	return A.second > B.second;
				496	};
				497	uint64_t UniqueStacks = 0;
				498	for (const auto *N : RootValues) {
				499	SmallVector<const TrieNode *, 16> S;
				500	S.emplace_back(N);
				501
				502	while (!S.empty()) {
				503	auto Top = S.pop_back_val();
				504
				505	// We only start printing the stack (by walking up the parent pointers)
				506	// when we get to a leaf function.
				507	if (!Top->TerminalDurations.empty()) {
				508	++UniqueStacks;
				509	auto TopSum = std::accumulate(Top->TerminalDurations.begin(),
				510	Top->TerminalDurations.end(), 0uLL);
				511	{
				512	auto E = std::make_pair(Top, TopSum);
				513	TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(),
				514	TopStacksBySum.end(), E,
				515	greater_second),
				516	E);
				517	if (TopStacksBySum.size() == 11)
				518	TopStacksBySum.pop_back();
				519	}
				520	{
				521	auto E = std::make_pair(Top, Top->TerminalDurations.size());
				522	TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(),
				523	TopStacksByCount.end(), E,
				524	greater_second),
				525	E);
				526	if (TopStacksByCount.size() == 11)
				527	TopStacksByCount.pop_back();
				528	}
				529	}
				530	for (const auto *C : Top->Callees)
				531	S.push_back(C);
				532	}
				533	}
				534
				535	// Now print the statistics in the end.
				536	OS << "\n";
				537	OS << "Unique Stacks: " << UniqueStacks << "\n";
				538	OS << "Top 10 Stacks by leaf sum:\n\n";
				539	for (const auto &P : TopStacksBySum) {
				540	OS << "Sum: " << P.second << "\n";
				541	printStack(OS, P.first, FN);
				542	}
				543	OS << "\n";
				544	OS << "Top 10 Stacks by leaf count:\n\n";
				545	for (const auto &P : TopStacksByCount) {
				546	OS << "Count: " << P.second << "\n";
				547	printStack(OS, P.first, FN);
				548	}
				549	OS << "\n";
				550	}
				551	};
				552
				553	std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error,
				554	const XRayRecord &Record,
				555	const FuncIdConversionHelper &Converter) {
				556	switch (Error) {
				557	case StackTrie::AccountRecordStatus::ENTRY_NOT_FOUND:
				558	return formatv("Found record {0} with no matching function entry\n",
				559	format_xray_record(Record, Converter));
				560	default:
				561	return formatv("Unknown error type for record {0}\n",
				562	format_xray_record(Record, Converter));
				563	}
				564	}
				565
				566	static CommandRegistration Unused(&Stack, []() -> Error {
				567	// Load each file provided as a command-line argument. For each one of them
				568	// account to a single StackTrie, and just print the whole trie for now.
				569	StackTrie ST;
				570	InstrumentationMap Map;
				571	if (!StacksInstrMap.empty()) {
				572	auto InstrumentationMapOrError = loadInstrumentationMap(StacksInstrMap);
				573	if (!InstrumentationMapOrError)
				574	return joinErrors(
				575	make_error<StringError>(
				576	Twine("Cannot open instrumentation map: ") + StacksInstrMap,
				577	std::make_error_code(std::errc::invalid_argument)),
				578	InstrumentationMapOrError.takeError());
				579	Map = std::move(*InstrumentationMapOrError);
				580	}
				581
				582	if (SeparateThreadStacks && AggregateThreads)
				583	return make_error<StringError>(
				584	Twine("Can't specify options for per thread reporting and reporting "
				585	"that aggregates threads."),
				586	std::make_error_code(std::errc::invalid_argument));
				587
				588	symbolize::LLVMSymbolizer::Options Opts(
				589	symbolize::FunctionNameKind::LinkageName, true, true, false, "");
				590	symbolize::LLVMSymbolizer Symbolizer(Opts);
				591	FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer,
				592	Map.getFunctionAddresses());
				593	// TODO: Someday, support output to files instead of just directly to
				594	// standard output.
				595	for (const auto &Filename : StackInputs) {
				596	auto TraceOrErr = loadTraceFile(Filename);
				597	if (!TraceOrErr) {
				598	if (!StackKeepGoing)
				599	return joinErrors(
				600	make_error<StringError>(
				601	Twine("Failed loading input file '") + Filename + "'",
				602	std::make_error_code(std::errc::invalid_argument)),
				603	TraceOrErr.takeError());
				604	logAllUnhandledErrors(TraceOrErr.takeError(), errs(), "");
				605	continue;
				606	}
				607	auto &T = *TraceOrErr;
				608	StackTrie::AccountRecordState AccountRecordState =
				609	StackTrie::AccountRecordState::CreateInitialState();
				610	for (const auto &Record : T) {
				611	auto error = ST.accountRecord(Record, &AccountRecordState);
				612	if (error != StackTrie::AccountRecordStatus::OK) {
				613	if (!StackKeepGoing)
				614	return make_error<StringError>(
				615	CreateErrorMessage(error, Record, FuncIdHelper),
				616	make_error_code(errc::illegal_byte_sequence));
				617	errs() << CreateErrorMessage(error, Record, FuncIdHelper);
				618	}
				619	}
				620	}
				621	if (ST.isEmpty()) {
				622	return make_error<StringError>(
				623	"No instrumented calls were accounted in the input file.",
				624	make_error_code(errc::result_out_of_range));
				625	}
				626	if (AggregateThreads) {
				627	ST.printAggregatingThreads(outs(), FuncIdHelper);
				628	} else if (SeparateThreadStacks) {
				629	ST.printPerThread(outs(), FuncIdHelper);
				630	} else {
				631	ST.printIgnoringThreads(outs(), FuncIdHelper);
				632	}
				633	return Error::success();
				634	});