Blame - compiler-rt/lib/xray/xray_function_call_trie.h - toolchain/llvm-project

blob: 95f760f788a85a5a0c45864934ad62003891a5cd [file] [log] [blame]

Dean Michael Berris	980d93d	2018-05-15 00:42:36 +0000	[diff] [blame]	1	//===-- xray_function_call_trie.h ------------------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file is a part of XRay, a dynamic runtime instrumentation system.
				11	//
				12	// This file defines the interface for a function call trie.
				13	//
				14	//===----------------------------------------------------------------------===//
				15	#ifndef XRAY_FUNCTION_CALL_TRIE_H
				16	#define XRAY_FUNCTION_CALL_TRIE_H
				17
				18	#include "xray_profiler_flags.h"
				19	#include "xray_segmented_array.h"
				20	#include <utility>
Dean Michael Berris	1eb8c20	2018-05-31 04:33:52 +0000	[diff] [blame]	21	#include <memory> // For placement new.
Dean Michael Berris	980d93d	2018-05-15 00:42:36 +0000	[diff] [blame]	22
				23	namespace __xray {
				24
				25	/// A FunctionCallTrie represents the stack traces of XRay instrumented
				26	/// functions that we've encountered, where a node corresponds to a function and
				27	/// the path from the root to the node its stack trace. Each node in the trie
				28	/// will contain some useful values, including:
				29	///
				30	/// * The cumulative amount of time spent in this particular node/stack.
				31	/// * The number of times this stack has appeared.
				32	/// * A histogram of latencies for that particular node.
				33	///
				34	/// Each node in the trie will also contain a list of callees, represented using
				35	/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
				36	/// ID of the callee, and a pointer to the node.
				37	///
				38	/// If we visualise this data structure, we'll find the following potential
				39	/// representation:
				40	///
				41	/// [function id node] -> [callees] [cumulative time]
				42	/// [call counter] [latency histogram]
				43	///
				44	/// As an example, when we have a function in this pseudocode:
				45	///
				46	/// func f(N) {
				47	/// g()
				48	/// h()
				49	/// for i := 1..N { j() }
				50	/// }
				51	///
				52	/// We may end up with a trie of the following form:
				53	///
				54	/// f -> [ g, h, j ] [...] [1] [...]
				55	/// g -> [ ... ] [...] [1] [...]
				56	/// h -> [ ... ] [...] [1] [...]
				57	/// j -> [ ... ] [...] [N] [...]
				58	///
				59	/// If for instance the function g() called j() like so:
				60	///
				61	/// func g() {
				62	/// for i := 1..10 { j() }
				63	/// }
				64	///
				65	/// We'll find the following updated trie:
				66	///
				67	/// f -> [ g, h, j ] [...] [1] [...]
				68	/// g -> [ j' ] [...] [1] [...]
				69	/// h -> [ ... ] [...] [1] [...]
				70	/// j -> [ ... ] [...] [N] [...]
				71	/// j' -> [ ... ] [...] [10] [...]
				72	///
				73	/// Note that we'll have a new node representing the path `f -> g -> j'` with
				74	/// isolated data. This isolation gives us a means of representing the stack
				75	/// traces as a path, as opposed to a key in a table. The alternative
				76	/// implementation here would be to use a separate table for the path, and use
				77	/// hashes of the path as an identifier to accumulate the information. We've
				78	/// moved away from this approach as it takes a lot of time to compute the hash
				79	/// every time we need to update a function's call information as we're handling
				80	/// the entry and exit events.
				81	///
				82	/// This approach allows us to maintain a shadow stack, which represents the
				83	/// currently executing path, and on function exits quickly compute the amount
				84	/// of time elapsed from the entry, then update the counters for the node
				85	/// already represented in the trie. This necessitates an efficient
				86	/// representation of the various data structures (the list of callees must be
				87	/// cache-aware and efficient to look up, and the histogram must be compact and
				88	/// quick to update) to enable us to keep the overheads of this implementation
				89	/// to the minimum.
				90	class FunctionCallTrie {
				91	public:
				92	struct Node;
				93
				94	// We use a NodeIdPair type instead of a std::pair<...> to not rely on the
				95	// standard library types in this header.
				96	struct NodeIdPair {
				97	Node *NodePtr;
				98	int32_t FId;
				99
				100	// Constructor for inplace-construction.
				101	NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
				102	};
				103
				104	using NodeIdPairArray = Array<NodeIdPair>;
				105	using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
				106
				107	// A Node in the FunctionCallTrie gives us a list of callees, the cumulative
				108	// number of times this node actually appeared, the cumulative amount of time
				109	// for this particular node including its children call times, and just the
				110	// local time spent on this node. Each Node will have the ID of the XRay
				111	// instrumented function that it is associated to.
				112	struct Node {
				113	Node *Parent;
				114	NodeIdPairArray Callees;
				115	int64_t CallCount;
				116	int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
				117	int32_t FId;
				118
				119	// We add a constructor here to allow us to inplace-construct through
				120	// Array<...>'s AppendEmplace.
				121	Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
				122	int32_t F)
				123	: Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
				124	FId(F) {}
				125
				126	// TODO: Include the compact histogram.
				127	};
				128
				129	private:
				130	struct ShadowStackEntry {
				131	int32_t FId; // We're copying the function ID into the stack to avoid having
				132	// to reach into the node just to get the function ID.
				133	uint64_t EntryTSC;
				134	Node *NodePtr;
				135
				136	// We add a constructor here to allow us to inplace-construct through
				137	// Array<...>'s AppendEmplace.
				138	ShadowStackEntry(int32_t F, uint64_t T, Node *N)
				139	: FId(F), EntryTSC(T), NodePtr(N) {}
				140	};
				141
				142	using NodeArray = Array<Node>;
				143	using RootArray = Array<Node *>;
				144	using ShadowStackArray = Array<ShadowStackEntry>;
				145
				146	public:
				147	// We collate the allocators we need into a single struct, as a convenience to
				148	// allow us to initialize these as a group.
				149	struct Allocators {
				150	using NodeAllocatorType = NodeArray::AllocatorType;
				151	using RootAllocatorType = RootArray::AllocatorType;
				152	using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
Dean Michael Berris	980d93d	2018-05-15 00:42:36 +0000	[diff] [blame]	153
				154	NodeAllocatorType *NodeAllocator = nullptr;
				155	RootAllocatorType *RootAllocator = nullptr;
				156	ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
				157	NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
				158
				159	Allocators() {}
				160	Allocators(const Allocators &) = delete;
				161	Allocators &operator=(const Allocators &) = delete;
				162
				163	Allocators(Allocators &&O)
				164	: NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
				165	ShadowStackAllocator(O.ShadowStackAllocator),
				166	NodeIdPairAllocator(O.NodeIdPairAllocator) {
				167	O.NodeAllocator = nullptr;
				168	O.RootAllocator = nullptr;
				169	O.ShadowStackAllocator = nullptr;
				170	O.NodeIdPairAllocator = nullptr;
				171	}
				172
				173	Allocators &operator=(Allocators &&O) {
				174	{
				175	auto Tmp = O.NodeAllocator;
				176	O.NodeAllocator = this->NodeAllocator;
				177	this->NodeAllocator = Tmp;
				178	}
				179	{
				180	auto Tmp = O.RootAllocator;
				181	O.RootAllocator = this->RootAllocator;
				182	this->RootAllocator = Tmp;
				183	}
				184	{
				185	auto Tmp = O.ShadowStackAllocator;
				186	O.ShadowStackAllocator = this->ShadowStackAllocator;
				187	this->ShadowStackAllocator = Tmp;
				188	}
				189	{
				190	auto Tmp = O.NodeIdPairAllocator;
				191	O.NodeIdPairAllocator = this->NodeIdPairAllocator;
				192	this->NodeIdPairAllocator = Tmp;
				193	}
				194	return *this;
				195	}
				196
				197	~Allocators() {
				198	// Note that we cannot use delete on these pointers, as they need to be
				199	// returned to the sanitizer_common library's internal memory tracking
				200	// system.
				201	if (NodeAllocator != nullptr) {
				202	NodeAllocator->~NodeAllocatorType();
				203	InternalFree(NodeAllocator);
				204	}
				205	if (RootAllocator != nullptr) {
				206	RootAllocator->~RootAllocatorType();
				207	InternalFree(RootAllocator);
				208	}
				209	if (ShadowStackAllocator != nullptr) {
				210	ShadowStackAllocator->~ShadowStackAllocatorType();
				211	InternalFree(ShadowStackAllocator);
				212	}
				213	if (NodeIdPairAllocator != nullptr) {
				214	NodeIdPairAllocator->~NodeIdPairAllocatorType();
				215	InternalFree(NodeIdPairAllocator);
				216	}
				217	}
				218	};
				219
				220	// TODO: Support configuration of options through the arguments.
				221	static Allocators InitAllocators() {
				222	Allocators A;
				223	auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
				224	InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
				225	new (NodeAllocator) Allocators::NodeAllocatorType(
				226	profilerFlags()->per_thread_allocator_max, 0);
				227	A.NodeAllocator = NodeAllocator;
				228
				229	auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
				230	InternalAlloc(sizeof(Allocators::RootAllocatorType)));
				231	new (RootAllocator) Allocators::RootAllocatorType(
				232	profilerFlags()->per_thread_allocator_max, 0);
				233	A.RootAllocator = RootAllocator;
				234
				235	auto ShadowStackAllocator =
				236	reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
				237	InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
				238	new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(
				239	profilerFlags()->per_thread_allocator_max, 0);
				240	A.ShadowStackAllocator = ShadowStackAllocator;
				241
Dean Michael Berris	d1fe506	2018-05-31 05:25:47 +0000	[diff] [blame^]	242	auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
				243	InternalAlloc(sizeof(NodeIdPairAllocatorType)));
				244	new (NodeIdPairAllocator)
				245	NodeIdPairAllocatorType(profilerFlags()->per_thread_allocator_max, 0);
Dean Michael Berris	980d93d	2018-05-15 00:42:36 +0000	[diff] [blame]	246	A.NodeIdPairAllocator = NodeIdPairAllocator;
				247	return A;
				248	}
				249
				250	private:
				251	NodeArray Nodes;
				252	RootArray Roots;
				253	ShadowStackArray ShadowStack;
				254	NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
				255
				256	const Allocators &GetGlobalAllocators() {
				257	static const Allocators A = [] { return InitAllocators(); }();
				258	return A;
				259	}
				260
				261	public:
				262	explicit FunctionCallTrie(const Allocators &A)
				263	: Nodes(A.NodeAllocator), Roots(A.RootAllocator),
				264	ShadowStack(*A.ShadowStackAllocator),
				265	NodeIdPairAllocator(A.NodeIdPairAllocator) {}
				266
				267	FunctionCallTrie() : FunctionCallTrie(GetGlobalAllocators()) {}
				268
				269	void enterFunction(int32_t FId, uint64_t TSC) {
				270	// This function primarily deals with ensuring that the ShadowStack is
				271	// consistent and ready for when an exit event is encountered.
				272	if (UNLIKELY(ShadowStack.empty())) {
				273	auto NewRoot =
				274	Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
				275	if (UNLIKELY(NewRoot == nullptr))
				276	return;
				277	Roots.Append(NewRoot);
				278	ShadowStack.AppendEmplace(FId, TSC, NewRoot);
				279	return;
				280	}
				281
				282	auto &Top = ShadowStack.back();
				283	auto TopNode = Top.NodePtr;
				284
				285	// If we've seen this callee before, then we just access that node and place
				286	// that on the top of the stack.
				287	auto Callee = TopNode->Callees.find_element(
				288	[FId](const NodeIdPair &NR) { return NR.FId == FId; });
				289	if (Callee != nullptr) {
				290	CHECK_NE(Callee->NodePtr, nullptr);
				291	ShadowStack.AppendEmplace(FId, TSC, Callee->NodePtr);
				292	return;
				293	}
				294
				295	// This means we've never seen this stack before, create a new node here.
				296	auto NewNode =
				297	Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
				298	if (UNLIKELY(NewNode == nullptr))
				299	return;
				300	TopNode->Callees.AppendEmplace(NewNode, FId);
				301	ShadowStack.AppendEmplace(FId, TSC, NewNode);
				302	return;
				303	}
				304
				305	void exitFunction(int32_t FId, uint64_t TSC) {
				306	// When we exit a function, we look up the ShadowStack to see whether we've
				307	// entered this function before. We do as little processing here as we can,
				308	// since most of the hard work would have already been done at function
				309	// entry.
				310	if (UNLIKELY(ShadowStack.empty()))
				311	return;
				312
				313	uint64_t CumulativeTreeTime = 0;
				314	while (!ShadowStack.empty()) {
				315	auto &Top = ShadowStack.back();
				316	auto TopNode = Top.NodePtr;
				317	auto TopFId = TopNode->FId;
				318	auto LocalTime = TSC - Top.EntryTSC;
				319	TopNode->CallCount++;
				320	TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
				321	CumulativeTreeTime += LocalTime;
				322	ShadowStack.trim(1);
				323
				324	// TODO: Update the histogram for the node.
				325	if (TopFId == FId)
				326	break;
				327	}
				328	}
				329
				330	const RootArray &getRoots() const { return Roots; }
				331
				332	// The deepCopyInto operation will update the provided FunctionCallTrie by
				333	// re-creating the contents of this particular FunctionCallTrie in the other
				334	// FunctionCallTrie. It will do this using a Depth First Traversal from the
				335	// roots, and while doing so recreating the traversal in the provided
				336	// FunctionCallTrie.
				337	//
				338	// This operation will not destroy the state in `O`, and thus may cause some
				339	// duplicate entries in `O` if it is not empty.
				340	//
				341	// This function is not thread-safe, and may require external
				342	// synchronisation of both "this" and \|O\|.
				343	//
				344	// This function must not be called with a non-empty FunctionCallTrie \|O\|.
				345	void deepCopyInto(FunctionCallTrie &O) const {
				346	DCHECK(O.getRoots().empty());
				347	for (const auto Root : getRoots()) {
				348	// Add a node in O for this root.
				349	auto NewRoot = O.Nodes.AppendEmplace(
				350	nullptr, *O.NodeIdPairAllocator, Root->CallCount,
				351	Root->CumulativeLocalTime, Root->FId);
				352	O.Roots.Append(NewRoot);
				353
				354	// We then push the root into a stack, to use as the parent marker for new
				355	// nodes we push in as we're traversing depth-first down the call tree.
				356	struct NodeAndParent {
				357	FunctionCallTrie::Node *Node;
				358	FunctionCallTrie::Node *NewNode;
				359	};
				360	using Stack = Array<NodeAndParent>;
				361
				362	typename Stack::AllocatorType StackAllocator(
				363	profilerFlags()->stack_allocator_max, 0);
				364	Stack DFSStack(StackAllocator);
				365
				366	// TODO: Figure out what to do if we fail to allocate any more stack
				367	// space. Maybe warn or report once?
				368	DFSStack.Append(NodeAndParent{Root, NewRoot});
				369	while (!DFSStack.empty()) {
				370	NodeAndParent NP = DFSStack.back();
				371	DCHECK_NE(NP.Node, nullptr);
				372	DCHECK_NE(NP.NewNode, nullptr);
				373	DFSStack.trim(1);
				374	for (const auto Callee : NP.Node->Callees) {
				375	auto NewNode = O.Nodes.AppendEmplace(
				376	NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
				377	Callee.NodePtr->CumulativeLocalTime, Callee.FId);
				378	DCHECK_NE(NewNode, nullptr);
				379	NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
				380	DFSStack.Append(NodeAndParent{Callee.NodePtr, NewNode});
				381	}
				382	}
				383	}
				384	}
				385
				386	// The mergeInto operation will update the provided FunctionCallTrie by
				387	// traversing the current trie's roots and updating (i.e. merging) the data in
				388	// the nodes with the data in the target's nodes. If the node doesn't exist in
				389	// the provided trie, we add a new one in the right position, and inherit the
				390	// data from the original (current) trie, along with all its callees.
				391	//
				392	// This function is not thread-safe, and may require external
				393	// synchronisation of both "this" and \|O\|.
				394	void mergeInto(FunctionCallTrie &O) const {
				395	struct NodeAndTarget {
				396	FunctionCallTrie::Node *OrigNode;
				397	FunctionCallTrie::Node *TargetNode;
				398	};
				399	using Stack = Array<NodeAndTarget>;
				400	typename Stack::AllocatorType StackAllocator(
				401	profilerFlags()->stack_allocator_max, 0);
				402	Stack DFSStack(StackAllocator);
				403
				404	for (const auto Root : getRoots()) {
				405	Node *TargetRoot = nullptr;
				406	auto R = O.Roots.find_element(
				407	[&](const Node *Node) { return Node->FId == Root->FId; });
				408	if (R == nullptr) {
				409	TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
				410	0, Root->FId);
				411	O.Roots.Append(TargetRoot);
				412	} else {
				413	TargetRoot = *R;
				414	}
				415
				416	DFSStack.Append(NodeAndTarget{Root, TargetRoot});
				417	while (!DFSStack.empty()) {
				418	NodeAndTarget NT = DFSStack.back();
				419	DCHECK_NE(NT.OrigNode, nullptr);
				420	DCHECK_NE(NT.TargetNode, nullptr);
				421	DFSStack.trim(1);
				422	// TODO: Update the histogram as well when we have it ready.
				423	NT.TargetNode->CallCount += NT.OrigNode->CallCount;
				424	NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
				425	for (const auto Callee : NT.OrigNode->Callees) {
				426	auto TargetCallee = NT.TargetNode->Callees.find_element(
				427	[&](const FunctionCallTrie::NodeIdPair &C) {
				428	return C.FId == Callee.FId;
				429	});
				430	if (TargetCallee == nullptr) {
				431	auto NewTargetNode = O.Nodes.AppendEmplace(
				432	NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
				433	TargetCallee =
				434	NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
				435	}
				436	DFSStack.Append(NodeAndTarget{Callee.NodePtr, TargetCallee->NodePtr});
				437	}
				438	}
				439	}
				440	}
				441	};
				442
				443	} // namespace __xray
				444
				445	#endif // XRAY_FUNCTION_CALL_TRIE_H