blob: af262f847dfd61f7d9a2238d3f3a48722748cefb [file] [log] [blame]
Dean Michael Berris980d93d2018-05-15 00:42:36 +00001//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of XRay, a dynamic runtime instrumentation system.
11//
12// This file defines the interface for a function call trie.
13//
14//===----------------------------------------------------------------------===//
15#ifndef XRAY_FUNCTION_CALL_TRIE_H
16#define XRAY_FUNCTION_CALL_TRIE_H
17
18#include "xray_profiler_flags.h"
19#include "xray_segmented_array.h"
20#include <utility>
21
22namespace __xray {
23
24/// A FunctionCallTrie represents the stack traces of XRay instrumented
25/// functions that we've encountered, where a node corresponds to a function and
26/// the path from the root to the node its stack trace. Each node in the trie
27/// will contain some useful values, including:
28///
29/// * The cumulative amount of time spent in this particular node/stack.
30/// * The number of times this stack has appeared.
31/// * A histogram of latencies for that particular node.
32///
33/// Each node in the trie will also contain a list of callees, represented using
34/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
35/// ID of the callee, and a pointer to the node.
36///
37/// If we visualise this data structure, we'll find the following potential
38/// representation:
39///
40/// [function id node] -> [callees] [cumulative time]
41/// [call counter] [latency histogram]
42///
43/// As an example, when we have a function in this pseudocode:
44///
45/// func f(N) {
46/// g()
47/// h()
48/// for i := 1..N { j() }
49/// }
50///
51/// We may end up with a trie of the following form:
52///
53/// f -> [ g, h, j ] [...] [1] [...]
54/// g -> [ ... ] [...] [1] [...]
55/// h -> [ ... ] [...] [1] [...]
56/// j -> [ ... ] [...] [N] [...]
57///
58/// If for instance the function g() called j() like so:
59///
60/// func g() {
61/// for i := 1..10 { j() }
62/// }
63///
64/// We'll find the following updated trie:
65///
66/// f -> [ g, h, j ] [...] [1] [...]
67/// g -> [ j' ] [...] [1] [...]
68/// h -> [ ... ] [...] [1] [...]
69/// j -> [ ... ] [...] [N] [...]
70/// j' -> [ ... ] [...] [10] [...]
71///
72/// Note that we'll have a new node representing the path `f -> g -> j'` with
73/// isolated data. This isolation gives us a means of representing the stack
74/// traces as a path, as opposed to a key in a table. The alternative
75/// implementation here would be to use a separate table for the path, and use
76/// hashes of the path as an identifier to accumulate the information. We've
77/// moved away from this approach as it takes a lot of time to compute the hash
78/// every time we need to update a function's call information as we're handling
79/// the entry and exit events.
80///
81/// This approach allows us to maintain a shadow stack, which represents the
82/// currently executing path, and on function exits quickly compute the amount
83/// of time elapsed from the entry, then update the counters for the node
84/// already represented in the trie. This necessitates an efficient
85/// representation of the various data structures (the list of callees must be
86/// cache-aware and efficient to look up, and the histogram must be compact and
87/// quick to update) to enable us to keep the overheads of this implementation
88/// to the minimum.
89class FunctionCallTrie {
90public:
91 struct Node;
92
93 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
94 // standard library types in this header.
95 struct NodeIdPair {
96 Node *NodePtr;
97 int32_t FId;
98
99 // Constructor for inplace-construction.
100 NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
101 };
102
103 using NodeIdPairArray = Array<NodeIdPair>;
104 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105
106 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107 // number of times this node actually appeared, the cumulative amount of time
108 // for this particular node including its children call times, and just the
109 // local time spent on this node. Each Node will have the ID of the XRay
110 // instrumented function that it is associated to.
111 struct Node {
112 Node *Parent;
113 NodeIdPairArray Callees;
114 int64_t CallCount;
115 int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116 int32_t FId;
117
118 // We add a constructor here to allow us to inplace-construct through
119 // Array<...>'s AppendEmplace.
120 Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
121 int32_t F)
122 : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
123 FId(F) {}
124
125 // TODO: Include the compact histogram.
126 };
127
128private:
129 struct ShadowStackEntry {
130 int32_t FId; // We're copying the function ID into the stack to avoid having
131 // to reach into the node just to get the function ID.
132 uint64_t EntryTSC;
133 Node *NodePtr;
134
135 // We add a constructor here to allow us to inplace-construct through
136 // Array<...>'s AppendEmplace.
137 ShadowStackEntry(int32_t F, uint64_t T, Node *N)
138 : FId(F), EntryTSC(T), NodePtr(N) {}
139 };
140
141 using NodeArray = Array<Node>;
142 using RootArray = Array<Node *>;
143 using ShadowStackArray = Array<ShadowStackEntry>;
144
145public:
146 // We collate the allocators we need into a single struct, as a convenience to
147 // allow us to initialize these as a group.
148 struct Allocators {
149 using NodeAllocatorType = NodeArray::AllocatorType;
150 using RootAllocatorType = RootArray::AllocatorType;
151 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
152 using NodeIdPairAllocatorType = NodeIdPairAllocatorType;
153
154 NodeAllocatorType *NodeAllocator = nullptr;
155 RootAllocatorType *RootAllocator = nullptr;
156 ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
157 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
158
159 Allocators() {}
160 Allocators(const Allocators &) = delete;
161 Allocators &operator=(const Allocators &) = delete;
162
163 Allocators(Allocators &&O)
164 : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
165 ShadowStackAllocator(O.ShadowStackAllocator),
166 NodeIdPairAllocator(O.NodeIdPairAllocator) {
167 O.NodeAllocator = nullptr;
168 O.RootAllocator = nullptr;
169 O.ShadowStackAllocator = nullptr;
170 O.NodeIdPairAllocator = nullptr;
171 }
172
173 Allocators &operator=(Allocators &&O) {
174 {
175 auto Tmp = O.NodeAllocator;
176 O.NodeAllocator = this->NodeAllocator;
177 this->NodeAllocator = Tmp;
178 }
179 {
180 auto Tmp = O.RootAllocator;
181 O.RootAllocator = this->RootAllocator;
182 this->RootAllocator = Tmp;
183 }
184 {
185 auto Tmp = O.ShadowStackAllocator;
186 O.ShadowStackAllocator = this->ShadowStackAllocator;
187 this->ShadowStackAllocator = Tmp;
188 }
189 {
190 auto Tmp = O.NodeIdPairAllocator;
191 O.NodeIdPairAllocator = this->NodeIdPairAllocator;
192 this->NodeIdPairAllocator = Tmp;
193 }
194 return *this;
195 }
196
197 ~Allocators() {
198 // Note that we cannot use delete on these pointers, as they need to be
199 // returned to the sanitizer_common library's internal memory tracking
200 // system.
201 if (NodeAllocator != nullptr) {
202 NodeAllocator->~NodeAllocatorType();
203 InternalFree(NodeAllocator);
204 }
205 if (RootAllocator != nullptr) {
206 RootAllocator->~RootAllocatorType();
207 InternalFree(RootAllocator);
208 }
209 if (ShadowStackAllocator != nullptr) {
210 ShadowStackAllocator->~ShadowStackAllocatorType();
211 InternalFree(ShadowStackAllocator);
212 }
213 if (NodeIdPairAllocator != nullptr) {
214 NodeIdPairAllocator->~NodeIdPairAllocatorType();
215 InternalFree(NodeIdPairAllocator);
216 }
217 }
218 };
219
220 // TODO: Support configuration of options through the arguments.
221 static Allocators InitAllocators() {
222 Allocators A;
223 auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
224 InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
225 new (NodeAllocator) Allocators::NodeAllocatorType(
226 profilerFlags()->per_thread_allocator_max, 0);
227 A.NodeAllocator = NodeAllocator;
228
229 auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
230 InternalAlloc(sizeof(Allocators::RootAllocatorType)));
231 new (RootAllocator) Allocators::RootAllocatorType(
232 profilerFlags()->per_thread_allocator_max, 0);
233 A.RootAllocator = RootAllocator;
234
235 auto ShadowStackAllocator =
236 reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
237 InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
238 new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(
239 profilerFlags()->per_thread_allocator_max, 0);
240 A.ShadowStackAllocator = ShadowStackAllocator;
241
242 auto NodeIdPairAllocator =
243 reinterpret_cast<Allocators::NodeIdPairAllocatorType *>(
244 InternalAlloc(sizeof(Allocators::NodeIdPairAllocatorType)));
245 new (NodeIdPairAllocator) Allocators::NodeIdPairAllocatorType(
246 profilerFlags()->per_thread_allocator_max, 0);
247 A.NodeIdPairAllocator = NodeIdPairAllocator;
248 return A;
249 }
250
251private:
252 NodeArray Nodes;
253 RootArray Roots;
254 ShadowStackArray ShadowStack;
255 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
256
257 const Allocators &GetGlobalAllocators() {
258 static const Allocators A = [] { return InitAllocators(); }();
259 return A;
260 }
261
262public:
263 explicit FunctionCallTrie(const Allocators &A)
264 : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
265 ShadowStack(*A.ShadowStackAllocator),
266 NodeIdPairAllocator(A.NodeIdPairAllocator) {}
267
268 FunctionCallTrie() : FunctionCallTrie(GetGlobalAllocators()) {}
269
270 void enterFunction(int32_t FId, uint64_t TSC) {
271 // This function primarily deals with ensuring that the ShadowStack is
272 // consistent and ready for when an exit event is encountered.
273 if (UNLIKELY(ShadowStack.empty())) {
274 auto NewRoot =
275 Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
276 if (UNLIKELY(NewRoot == nullptr))
277 return;
278 Roots.Append(NewRoot);
279 ShadowStack.AppendEmplace(FId, TSC, NewRoot);
280 return;
281 }
282
283 auto &Top = ShadowStack.back();
284 auto TopNode = Top.NodePtr;
285
286 // If we've seen this callee before, then we just access that node and place
287 // that on the top of the stack.
288 auto Callee = TopNode->Callees.find_element(
289 [FId](const NodeIdPair &NR) { return NR.FId == FId; });
290 if (Callee != nullptr) {
291 CHECK_NE(Callee->NodePtr, nullptr);
292 ShadowStack.AppendEmplace(FId, TSC, Callee->NodePtr);
293 return;
294 }
295
296 // This means we've never seen this stack before, create a new node here.
297 auto NewNode =
298 Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
299 if (UNLIKELY(NewNode == nullptr))
300 return;
301 TopNode->Callees.AppendEmplace(NewNode, FId);
302 ShadowStack.AppendEmplace(FId, TSC, NewNode);
303 return;
304 }
305
306 void exitFunction(int32_t FId, uint64_t TSC) {
307 // When we exit a function, we look up the ShadowStack to see whether we've
308 // entered this function before. We do as little processing here as we can,
309 // since most of the hard work would have already been done at function
310 // entry.
311 if (UNLIKELY(ShadowStack.empty()))
312 return;
313
314 uint64_t CumulativeTreeTime = 0;
315 while (!ShadowStack.empty()) {
316 auto &Top = ShadowStack.back();
317 auto TopNode = Top.NodePtr;
318 auto TopFId = TopNode->FId;
319 auto LocalTime = TSC - Top.EntryTSC;
320 TopNode->CallCount++;
321 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
322 CumulativeTreeTime += LocalTime;
323 ShadowStack.trim(1);
324
325 // TODO: Update the histogram for the node.
326 if (TopFId == FId)
327 break;
328 }
329 }
330
331 const RootArray &getRoots() const { return Roots; }
332
333 // The deepCopyInto operation will update the provided FunctionCallTrie by
334 // re-creating the contents of this particular FunctionCallTrie in the other
335 // FunctionCallTrie. It will do this using a Depth First Traversal from the
336 // roots, and while doing so recreating the traversal in the provided
337 // FunctionCallTrie.
338 //
339 // This operation will *not* destroy the state in `O`, and thus may cause some
340 // duplicate entries in `O` if it is not empty.
341 //
342 // This function is *not* thread-safe, and may require external
343 // synchronisation of both "this" and |O|.
344 //
345 // This function must *not* be called with a non-empty FunctionCallTrie |O|.
346 void deepCopyInto(FunctionCallTrie &O) const {
347 DCHECK(O.getRoots().empty());
348 for (const auto Root : getRoots()) {
349 // Add a node in O for this root.
350 auto NewRoot = O.Nodes.AppendEmplace(
351 nullptr, *O.NodeIdPairAllocator, Root->CallCount,
352 Root->CumulativeLocalTime, Root->FId);
353 O.Roots.Append(NewRoot);
354
355 // We then push the root into a stack, to use as the parent marker for new
356 // nodes we push in as we're traversing depth-first down the call tree.
357 struct NodeAndParent {
358 FunctionCallTrie::Node *Node;
359 FunctionCallTrie::Node *NewNode;
360 };
361 using Stack = Array<NodeAndParent>;
362
363 typename Stack::AllocatorType StackAllocator(
364 profilerFlags()->stack_allocator_max, 0);
365 Stack DFSStack(StackAllocator);
366
367 // TODO: Figure out what to do if we fail to allocate any more stack
368 // space. Maybe warn or report once?
369 DFSStack.Append(NodeAndParent{Root, NewRoot});
370 while (!DFSStack.empty()) {
371 NodeAndParent NP = DFSStack.back();
372 DCHECK_NE(NP.Node, nullptr);
373 DCHECK_NE(NP.NewNode, nullptr);
374 DFSStack.trim(1);
375 for (const auto Callee : NP.Node->Callees) {
376 auto NewNode = O.Nodes.AppendEmplace(
377 NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
378 Callee.NodePtr->CumulativeLocalTime, Callee.FId);
379 DCHECK_NE(NewNode, nullptr);
380 NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
381 DFSStack.Append(NodeAndParent{Callee.NodePtr, NewNode});
382 }
383 }
384 }
385 }
386
387 // The mergeInto operation will update the provided FunctionCallTrie by
388 // traversing the current trie's roots and updating (i.e. merging) the data in
389 // the nodes with the data in the target's nodes. If the node doesn't exist in
390 // the provided trie, we add a new one in the right position, and inherit the
391 // data from the original (current) trie, along with all its callees.
392 //
393 // This function is *not* thread-safe, and may require external
394 // synchronisation of both "this" and |O|.
395 void mergeInto(FunctionCallTrie &O) const {
396 struct NodeAndTarget {
397 FunctionCallTrie::Node *OrigNode;
398 FunctionCallTrie::Node *TargetNode;
399 };
400 using Stack = Array<NodeAndTarget>;
401 typename Stack::AllocatorType StackAllocator(
402 profilerFlags()->stack_allocator_max, 0);
403 Stack DFSStack(StackAllocator);
404
405 for (const auto Root : getRoots()) {
406 Node *TargetRoot = nullptr;
407 auto R = O.Roots.find_element(
408 [&](const Node *Node) { return Node->FId == Root->FId; });
409 if (R == nullptr) {
410 TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
411 0, Root->FId);
412 O.Roots.Append(TargetRoot);
413 } else {
414 TargetRoot = *R;
415 }
416
417 DFSStack.Append(NodeAndTarget{Root, TargetRoot});
418 while (!DFSStack.empty()) {
419 NodeAndTarget NT = DFSStack.back();
420 DCHECK_NE(NT.OrigNode, nullptr);
421 DCHECK_NE(NT.TargetNode, nullptr);
422 DFSStack.trim(1);
423 // TODO: Update the histogram as well when we have it ready.
424 NT.TargetNode->CallCount += NT.OrigNode->CallCount;
425 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
426 for (const auto Callee : NT.OrigNode->Callees) {
427 auto TargetCallee = NT.TargetNode->Callees.find_element(
428 [&](const FunctionCallTrie::NodeIdPair &C) {
429 return C.FId == Callee.FId;
430 });
431 if (TargetCallee == nullptr) {
432 auto NewTargetNode = O.Nodes.AppendEmplace(
433 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
434 TargetCallee =
435 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
436 }
437 DFSStack.Append(NodeAndTarget{Callee.NodePtr, TargetCallee->NodePtr});
438 }
439 }
440 }
441 }
442};
443
444} // namespace __xray
445
446#endif // XRAY_FUNCTION_CALL_TRIE_H