blob: 6c47105845436418bfb379a5644f77fca87a2769 [file] [log] [blame]
Dean Michael Berris980d93d2018-05-15 00:42:36 +00001//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of XRay, a dynamic runtime instrumentation system.
11//
12// This file defines the interface for a function call trie.
13//
14//===----------------------------------------------------------------------===//
15#ifndef XRAY_FUNCTION_CALL_TRIE_H
16#define XRAY_FUNCTION_CALL_TRIE_H
17
Dean Michael Berriscfd7eec2018-06-12 03:29:39 +000018#include "xray_profiling_flags.h"
Dean Michael Berris980d93d2018-05-15 00:42:36 +000019#include "xray_segmented_array.h"
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +000020#include <memory> // For placement new.
Dean Michael Berris980d93d2018-05-15 00:42:36 +000021#include <utility>
Dean Michael Berris980d93d2018-05-15 00:42:36 +000022
23namespace __xray {
24
25/// A FunctionCallTrie represents the stack traces of XRay instrumented
26/// functions that we've encountered, where a node corresponds to a function and
27/// the path from the root to the node its stack trace. Each node in the trie
28/// will contain some useful values, including:
29///
30/// * The cumulative amount of time spent in this particular node/stack.
31/// * The number of times this stack has appeared.
32/// * A histogram of latencies for that particular node.
33///
34/// Each node in the trie will also contain a list of callees, represented using
35/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
36/// ID of the callee, and a pointer to the node.
37///
38/// If we visualise this data structure, we'll find the following potential
39/// representation:
40///
41/// [function id node] -> [callees] [cumulative time]
42/// [call counter] [latency histogram]
43///
44/// As an example, when we have a function in this pseudocode:
45///
46/// func f(N) {
47/// g()
48/// h()
49/// for i := 1..N { j() }
50/// }
51///
52/// We may end up with a trie of the following form:
53///
54/// f -> [ g, h, j ] [...] [1] [...]
55/// g -> [ ... ] [...] [1] [...]
56/// h -> [ ... ] [...] [1] [...]
57/// j -> [ ... ] [...] [N] [...]
58///
59/// If for instance the function g() called j() like so:
60///
61/// func g() {
62/// for i := 1..10 { j() }
63/// }
64///
65/// We'll find the following updated trie:
66///
67/// f -> [ g, h, j ] [...] [1] [...]
68/// g -> [ j' ] [...] [1] [...]
69/// h -> [ ... ] [...] [1] [...]
70/// j -> [ ... ] [...] [N] [...]
71/// j' -> [ ... ] [...] [10] [...]
72///
73/// Note that we'll have a new node representing the path `f -> g -> j'` with
74/// isolated data. This isolation gives us a means of representing the stack
75/// traces as a path, as opposed to a key in a table. The alternative
76/// implementation here would be to use a separate table for the path, and use
77/// hashes of the path as an identifier to accumulate the information. We've
78/// moved away from this approach as it takes a lot of time to compute the hash
79/// every time we need to update a function's call information as we're handling
80/// the entry and exit events.
81///
82/// This approach allows us to maintain a shadow stack, which represents the
83/// currently executing path, and on function exits quickly compute the amount
84/// of time elapsed from the entry, then update the counters for the node
85/// already represented in the trie. This necessitates an efficient
86/// representation of the various data structures (the list of callees must be
87/// cache-aware and efficient to look up, and the histogram must be compact and
88/// quick to update) to enable us to keep the overheads of this implementation
89/// to the minimum.
90class FunctionCallTrie {
91public:
92 struct Node;
93
94 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
95 // standard library types in this header.
96 struct NodeIdPair {
97 Node *NodePtr;
98 int32_t FId;
99
100 // Constructor for inplace-construction.
101 NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
102 };
103
104 using NodeIdPairArray = Array<NodeIdPair>;
105 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
106
107 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
108 // number of times this node actually appeared, the cumulative amount of time
109 // for this particular node including its children call times, and just the
110 // local time spent on this node. Each Node will have the ID of the XRay
111 // instrumented function that it is associated to.
112 struct Node {
113 Node *Parent;
114 NodeIdPairArray Callees;
115 int64_t CallCount;
116 int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
117 int32_t FId;
118
119 // We add a constructor here to allow us to inplace-construct through
120 // Array<...>'s AppendEmplace.
121 Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
122 int32_t F)
123 : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
124 FId(F) {}
125
126 // TODO: Include the compact histogram.
127 };
128
129private:
130 struct ShadowStackEntry {
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000131 uint64_t EntryTSC;
132 Node *NodePtr;
133
134 // We add a constructor here to allow us to inplace-construct through
135 // Array<...>'s AppendEmplace.
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000136 ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {}
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000137 };
138
139 using NodeArray = Array<Node>;
140 using RootArray = Array<Node *>;
141 using ShadowStackArray = Array<ShadowStackEntry>;
142
143public:
144 // We collate the allocators we need into a single struct, as a convenience to
145 // allow us to initialize these as a group.
146 struct Allocators {
147 using NodeAllocatorType = NodeArray::AllocatorType;
148 using RootAllocatorType = RootArray::AllocatorType;
149 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000150
151 NodeAllocatorType *NodeAllocator = nullptr;
152 RootAllocatorType *RootAllocator = nullptr;
153 ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
154 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
155
156 Allocators() {}
157 Allocators(const Allocators &) = delete;
158 Allocators &operator=(const Allocators &) = delete;
159
160 Allocators(Allocators &&O)
161 : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
162 ShadowStackAllocator(O.ShadowStackAllocator),
163 NodeIdPairAllocator(O.NodeIdPairAllocator) {
164 O.NodeAllocator = nullptr;
165 O.RootAllocator = nullptr;
166 O.ShadowStackAllocator = nullptr;
167 O.NodeIdPairAllocator = nullptr;
168 }
169
170 Allocators &operator=(Allocators &&O) {
171 {
172 auto Tmp = O.NodeAllocator;
173 O.NodeAllocator = this->NodeAllocator;
174 this->NodeAllocator = Tmp;
175 }
176 {
177 auto Tmp = O.RootAllocator;
178 O.RootAllocator = this->RootAllocator;
179 this->RootAllocator = Tmp;
180 }
181 {
182 auto Tmp = O.ShadowStackAllocator;
183 O.ShadowStackAllocator = this->ShadowStackAllocator;
184 this->ShadowStackAllocator = Tmp;
185 }
186 {
187 auto Tmp = O.NodeIdPairAllocator;
188 O.NodeIdPairAllocator = this->NodeIdPairAllocator;
189 this->NodeIdPairAllocator = Tmp;
190 }
191 return *this;
192 }
193
194 ~Allocators() {
195 // Note that we cannot use delete on these pointers, as they need to be
196 // returned to the sanitizer_common library's internal memory tracking
197 // system.
198 if (NodeAllocator != nullptr) {
199 NodeAllocator->~NodeAllocatorType();
200 InternalFree(NodeAllocator);
201 }
202 if (RootAllocator != nullptr) {
203 RootAllocator->~RootAllocatorType();
204 InternalFree(RootAllocator);
205 }
206 if (ShadowStackAllocator != nullptr) {
207 ShadowStackAllocator->~ShadowStackAllocatorType();
208 InternalFree(ShadowStackAllocator);
209 }
210 if (NodeIdPairAllocator != nullptr) {
211 NodeIdPairAllocator->~NodeIdPairAllocatorType();
212 InternalFree(NodeIdPairAllocator);
213 }
214 }
215 };
216
217 // TODO: Support configuration of options through the arguments.
218 static Allocators InitAllocators() {
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000219 return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
220 }
221
222 static Allocators InitAllocatorsCustom(uptr Max) {
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000223 Allocators A;
224 auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
225 InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000226 new (NodeAllocator) Allocators::NodeAllocatorType(Max, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000227 A.NodeAllocator = NodeAllocator;
228
229 auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
230 InternalAlloc(sizeof(Allocators::RootAllocatorType)));
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000231 new (RootAllocator) Allocators::RootAllocatorType(Max, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000232 A.RootAllocator = RootAllocator;
233
234 auto ShadowStackAllocator =
235 reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
236 InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000237 new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000238 A.ShadowStackAllocator = ShadowStackAllocator;
239
Dean Michael Berrisd1fe5062018-05-31 05:25:47 +0000240 auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
241 InternalAlloc(sizeof(NodeIdPairAllocatorType)));
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000242 new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000243 A.NodeIdPairAllocator = NodeIdPairAllocator;
244 return A;
245 }
246
247private:
248 NodeArray Nodes;
249 RootArray Roots;
250 ShadowStackArray ShadowStack;
251 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
252
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000253public:
254 explicit FunctionCallTrie(const Allocators &A)
255 : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
256 ShadowStack(*A.ShadowStackAllocator),
257 NodeIdPairAllocator(A.NodeIdPairAllocator) {}
258
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000259 void enterFunction(const int32_t FId, uint64_t TSC) {
260 DCHECK_NE(FId, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000261 // This function primarily deals with ensuring that the ShadowStack is
262 // consistent and ready for when an exit event is encountered.
263 if (UNLIKELY(ShadowStack.empty())) {
264 auto NewRoot =
265 Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
266 if (UNLIKELY(NewRoot == nullptr))
267 return;
268 Roots.Append(NewRoot);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000269 ShadowStack.AppendEmplace(TSC, NewRoot);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000270 return;
271 }
272
273 auto &Top = ShadowStack.back();
274 auto TopNode = Top.NodePtr;
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000275 DCHECK_NE(TopNode, nullptr);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000276
277 // If we've seen this callee before, then we just access that node and place
278 // that on the top of the stack.
279 auto Callee = TopNode->Callees.find_element(
280 [FId](const NodeIdPair &NR) { return NR.FId == FId; });
281 if (Callee != nullptr) {
282 CHECK_NE(Callee->NodePtr, nullptr);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000283 ShadowStack.AppendEmplace(TSC, Callee->NodePtr);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000284 return;
285 }
286
287 // This means we've never seen this stack before, create a new node here.
288 auto NewNode =
289 Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
290 if (UNLIKELY(NewNode == nullptr))
291 return;
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000292 DCHECK_NE(NewNode, nullptr);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000293 TopNode->Callees.AppendEmplace(NewNode, FId);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000294 ShadowStack.AppendEmplace(TSC, NewNode);
295 DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000296 return;
297 }
298
299 void exitFunction(int32_t FId, uint64_t TSC) {
300 // When we exit a function, we look up the ShadowStack to see whether we've
301 // entered this function before. We do as little processing here as we can,
302 // since most of the hard work would have already been done at function
303 // entry.
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000304 uint64_t CumulativeTreeTime = 0;
305 while (!ShadowStack.empty()) {
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000306 const auto &Top = ShadowStack.back();
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000307 auto TopNode = Top.NodePtr;
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000308 DCHECK_NE(TopNode, nullptr);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000309 auto LocalTime = TSC - Top.EntryTSC;
310 TopNode->CallCount++;
311 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
312 CumulativeTreeTime += LocalTime;
313 ShadowStack.trim(1);
314
315 // TODO: Update the histogram for the node.
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000316 if (TopNode->FId == FId)
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000317 break;
318 }
319 }
320
321 const RootArray &getRoots() const { return Roots; }
322
323 // The deepCopyInto operation will update the provided FunctionCallTrie by
324 // re-creating the contents of this particular FunctionCallTrie in the other
325 // FunctionCallTrie. It will do this using a Depth First Traversal from the
326 // roots, and while doing so recreating the traversal in the provided
327 // FunctionCallTrie.
328 //
329 // This operation will *not* destroy the state in `O`, and thus may cause some
330 // duplicate entries in `O` if it is not empty.
331 //
332 // This function is *not* thread-safe, and may require external
333 // synchronisation of both "this" and |O|.
334 //
335 // This function must *not* be called with a non-empty FunctionCallTrie |O|.
336 void deepCopyInto(FunctionCallTrie &O) const {
337 DCHECK(O.getRoots().empty());
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000338
339 // We then push the root into a stack, to use as the parent marker for new
340 // nodes we push in as we're traversing depth-first down the call tree.
341 struct NodeAndParent {
342 FunctionCallTrie::Node *Node;
343 FunctionCallTrie::Node *NewNode;
344 };
345 using Stack = Array<NodeAndParent>;
346
347 typename Stack::AllocatorType StackAllocator(
348 profilingFlags()->stack_allocator_max, 0);
349 Stack DFSStack(StackAllocator);
350
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000351 for (const auto Root : getRoots()) {
352 // Add a node in O for this root.
353 auto NewRoot = O.Nodes.AppendEmplace(
354 nullptr, *O.NodeIdPairAllocator, Root->CallCount,
355 Root->CumulativeLocalTime, Root->FId);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000356
357 // Because we cannot allocate more memory we should bail out right away.
358 if (UNLIKELY(NewRoot == nullptr))
359 return;
360
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000361 O.Roots.Append(NewRoot);
362
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000363 // TODO: Figure out what to do if we fail to allocate any more stack
364 // space. Maybe warn or report once?
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000365 DFSStack.AppendEmplace(Root, NewRoot);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000366 while (!DFSStack.empty()) {
367 NodeAndParent NP = DFSStack.back();
368 DCHECK_NE(NP.Node, nullptr);
369 DCHECK_NE(NP.NewNode, nullptr);
370 DFSStack.trim(1);
371 for (const auto Callee : NP.Node->Callees) {
372 auto NewNode = O.Nodes.AppendEmplace(
373 NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
374 Callee.NodePtr->CumulativeLocalTime, Callee.FId);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000375 if (UNLIKELY(NewNode == nullptr))
376 return;
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000377 NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000378 DFSStack.AppendEmplace(Callee.NodePtr, NewNode);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000379 }
380 }
381 }
382 }
383
384 // The mergeInto operation will update the provided FunctionCallTrie by
385 // traversing the current trie's roots and updating (i.e. merging) the data in
386 // the nodes with the data in the target's nodes. If the node doesn't exist in
387 // the provided trie, we add a new one in the right position, and inherit the
388 // data from the original (current) trie, along with all its callees.
389 //
390 // This function is *not* thread-safe, and may require external
391 // synchronisation of both "this" and |O|.
392 void mergeInto(FunctionCallTrie &O) const {
393 struct NodeAndTarget {
394 FunctionCallTrie::Node *OrigNode;
395 FunctionCallTrie::Node *TargetNode;
396 };
397 using Stack = Array<NodeAndTarget>;
398 typename Stack::AllocatorType StackAllocator(
Dean Michael Berriscfd7eec2018-06-12 03:29:39 +0000399 profilingFlags()->stack_allocator_max, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000400 Stack DFSStack(StackAllocator);
401
402 for (const auto Root : getRoots()) {
403 Node *TargetRoot = nullptr;
404 auto R = O.Roots.find_element(
405 [&](const Node *Node) { return Node->FId == Root->FId; });
406 if (R == nullptr) {
407 TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
408 0, Root->FId);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000409 if (UNLIKELY(TargetRoot == nullptr))
410 return;
411
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000412 O.Roots.Append(TargetRoot);
413 } else {
414 TargetRoot = *R;
415 }
416
417 DFSStack.Append(NodeAndTarget{Root, TargetRoot});
418 while (!DFSStack.empty()) {
419 NodeAndTarget NT = DFSStack.back();
420 DCHECK_NE(NT.OrigNode, nullptr);
421 DCHECK_NE(NT.TargetNode, nullptr);
422 DFSStack.trim(1);
423 // TODO: Update the histogram as well when we have it ready.
424 NT.TargetNode->CallCount += NT.OrigNode->CallCount;
425 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
426 for (const auto Callee : NT.OrigNode->Callees) {
427 auto TargetCallee = NT.TargetNode->Callees.find_element(
428 [&](const FunctionCallTrie::NodeIdPair &C) {
429 return C.FId == Callee.FId;
430 });
431 if (TargetCallee == nullptr) {
432 auto NewTargetNode = O.Nodes.AppendEmplace(
433 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000434
435 if (UNLIKELY(NewTargetNode == nullptr))
436 return;
437
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000438 TargetCallee =
439 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
440 }
Dean Michael Berris0dd4f9f2018-07-10 08:25:44 +0000441 DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000442 }
443 }
444 }
445 }
446};
447
448} // namespace __xray
449
450#endif // XRAY_FUNCTION_CALL_TRIE_H