blob: 2eca5b25f0536e6071a7aef2516eb2b953a87827 [file] [log] [blame]
Dean Michael Berris980d93d2018-05-15 00:42:36 +00001//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of XRay, a dynamic runtime instrumentation system.
11//
12// This file defines the interface for a function call trie.
13//
14//===----------------------------------------------------------------------===//
15#ifndef XRAY_FUNCTION_CALL_TRIE_H
16#define XRAY_FUNCTION_CALL_TRIE_H
17
18#include "xray_profiler_flags.h"
19#include "xray_segmented_array.h"
20#include <utility>
Dean Michael Berris1eb8c202018-05-31 04:33:52 +000021#include <memory> // For placement new.
Dean Michael Berris980d93d2018-05-15 00:42:36 +000022
23namespace __xray {
24
25/// A FunctionCallTrie represents the stack traces of XRay instrumented
26/// functions that we've encountered, where a node corresponds to a function and
27/// the path from the root to the node its stack trace. Each node in the trie
28/// will contain some useful values, including:
29///
30/// * The cumulative amount of time spent in this particular node/stack.
31/// * The number of times this stack has appeared.
32/// * A histogram of latencies for that particular node.
33///
34/// Each node in the trie will also contain a list of callees, represented using
35/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
36/// ID of the callee, and a pointer to the node.
37///
38/// If we visualise this data structure, we'll find the following potential
39/// representation:
40///
41/// [function id node] -> [callees] [cumulative time]
42/// [call counter] [latency histogram]
43///
44/// As an example, when we have a function in this pseudocode:
45///
46/// func f(N) {
47/// g()
48/// h()
49/// for i := 1..N { j() }
50/// }
51///
52/// We may end up with a trie of the following form:
53///
54/// f -> [ g, h, j ] [...] [1] [...]
55/// g -> [ ... ] [...] [1] [...]
56/// h -> [ ... ] [...] [1] [...]
57/// j -> [ ... ] [...] [N] [...]
58///
59/// If for instance the function g() called j() like so:
60///
61/// func g() {
62/// for i := 1..10 { j() }
63/// }
64///
65/// We'll find the following updated trie:
66///
67/// f -> [ g, h, j ] [...] [1] [...]
68/// g -> [ j' ] [...] [1] [...]
69/// h -> [ ... ] [...] [1] [...]
70/// j -> [ ... ] [...] [N] [...]
71/// j' -> [ ... ] [...] [10] [...]
72///
73/// Note that we'll have a new node representing the path `f -> g -> j'` with
74/// isolated data. This isolation gives us a means of representing the stack
75/// traces as a path, as opposed to a key in a table. The alternative
76/// implementation here would be to use a separate table for the path, and use
77/// hashes of the path as an identifier to accumulate the information. We've
78/// moved away from this approach as it takes a lot of time to compute the hash
79/// every time we need to update a function's call information as we're handling
80/// the entry and exit events.
81///
82/// This approach allows us to maintain a shadow stack, which represents the
83/// currently executing path, and on function exits quickly compute the amount
84/// of time elapsed from the entry, then update the counters for the node
85/// already represented in the trie. This necessitates an efficient
86/// representation of the various data structures (the list of callees must be
87/// cache-aware and efficient to look up, and the histogram must be compact and
88/// quick to update) to enable us to keep the overheads of this implementation
89/// to the minimum.
90class FunctionCallTrie {
91public:
92 struct Node;
93
94 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
95 // standard library types in this header.
96 struct NodeIdPair {
97 Node *NodePtr;
98 int32_t FId;
99
100 // Constructor for inplace-construction.
101 NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
102 };
103
104 using NodeIdPairArray = Array<NodeIdPair>;
105 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
106
107 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
108 // number of times this node actually appeared, the cumulative amount of time
109 // for this particular node including its children call times, and just the
110 // local time spent on this node. Each Node will have the ID of the XRay
111 // instrumented function that it is associated to.
112 struct Node {
113 Node *Parent;
114 NodeIdPairArray Callees;
115 int64_t CallCount;
116 int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
117 int32_t FId;
118
119 // We add a constructor here to allow us to inplace-construct through
120 // Array<...>'s AppendEmplace.
121 Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
122 int32_t F)
123 : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
124 FId(F) {}
125
126 // TODO: Include the compact histogram.
127 };
128
129private:
130 struct ShadowStackEntry {
131 int32_t FId; // We're copying the function ID into the stack to avoid having
132 // to reach into the node just to get the function ID.
133 uint64_t EntryTSC;
134 Node *NodePtr;
135
136 // We add a constructor here to allow us to inplace-construct through
137 // Array<...>'s AppendEmplace.
138 ShadowStackEntry(int32_t F, uint64_t T, Node *N)
139 : FId(F), EntryTSC(T), NodePtr(N) {}
140 };
141
142 using NodeArray = Array<Node>;
143 using RootArray = Array<Node *>;
144 using ShadowStackArray = Array<ShadowStackEntry>;
145
146public:
147 // We collate the allocators we need into a single struct, as a convenience to
148 // allow us to initialize these as a group.
149 struct Allocators {
150 using NodeAllocatorType = NodeArray::AllocatorType;
151 using RootAllocatorType = RootArray::AllocatorType;
152 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
153 using NodeIdPairAllocatorType = NodeIdPairAllocatorType;
154
155 NodeAllocatorType *NodeAllocator = nullptr;
156 RootAllocatorType *RootAllocator = nullptr;
157 ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
158 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
159
160 Allocators() {}
161 Allocators(const Allocators &) = delete;
162 Allocators &operator=(const Allocators &) = delete;
163
164 Allocators(Allocators &&O)
165 : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
166 ShadowStackAllocator(O.ShadowStackAllocator),
167 NodeIdPairAllocator(O.NodeIdPairAllocator) {
168 O.NodeAllocator = nullptr;
169 O.RootAllocator = nullptr;
170 O.ShadowStackAllocator = nullptr;
171 O.NodeIdPairAllocator = nullptr;
172 }
173
174 Allocators &operator=(Allocators &&O) {
175 {
176 auto Tmp = O.NodeAllocator;
177 O.NodeAllocator = this->NodeAllocator;
178 this->NodeAllocator = Tmp;
179 }
180 {
181 auto Tmp = O.RootAllocator;
182 O.RootAllocator = this->RootAllocator;
183 this->RootAllocator = Tmp;
184 }
185 {
186 auto Tmp = O.ShadowStackAllocator;
187 O.ShadowStackAllocator = this->ShadowStackAllocator;
188 this->ShadowStackAllocator = Tmp;
189 }
190 {
191 auto Tmp = O.NodeIdPairAllocator;
192 O.NodeIdPairAllocator = this->NodeIdPairAllocator;
193 this->NodeIdPairAllocator = Tmp;
194 }
195 return *this;
196 }
197
198 ~Allocators() {
199 // Note that we cannot use delete on these pointers, as they need to be
200 // returned to the sanitizer_common library's internal memory tracking
201 // system.
202 if (NodeAllocator != nullptr) {
203 NodeAllocator->~NodeAllocatorType();
204 InternalFree(NodeAllocator);
205 }
206 if (RootAllocator != nullptr) {
207 RootAllocator->~RootAllocatorType();
208 InternalFree(RootAllocator);
209 }
210 if (ShadowStackAllocator != nullptr) {
211 ShadowStackAllocator->~ShadowStackAllocatorType();
212 InternalFree(ShadowStackAllocator);
213 }
214 if (NodeIdPairAllocator != nullptr) {
215 NodeIdPairAllocator->~NodeIdPairAllocatorType();
216 InternalFree(NodeIdPairAllocator);
217 }
218 }
219 };
220
221 // TODO: Support configuration of options through the arguments.
222 static Allocators InitAllocators() {
223 Allocators A;
224 auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
225 InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
226 new (NodeAllocator) Allocators::NodeAllocatorType(
227 profilerFlags()->per_thread_allocator_max, 0);
228 A.NodeAllocator = NodeAllocator;
229
230 auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
231 InternalAlloc(sizeof(Allocators::RootAllocatorType)));
232 new (RootAllocator) Allocators::RootAllocatorType(
233 profilerFlags()->per_thread_allocator_max, 0);
234 A.RootAllocator = RootAllocator;
235
236 auto ShadowStackAllocator =
237 reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
238 InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
239 new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(
240 profilerFlags()->per_thread_allocator_max, 0);
241 A.ShadowStackAllocator = ShadowStackAllocator;
242
243 auto NodeIdPairAllocator =
244 reinterpret_cast<Allocators::NodeIdPairAllocatorType *>(
245 InternalAlloc(sizeof(Allocators::NodeIdPairAllocatorType)));
246 new (NodeIdPairAllocator) Allocators::NodeIdPairAllocatorType(
247 profilerFlags()->per_thread_allocator_max, 0);
248 A.NodeIdPairAllocator = NodeIdPairAllocator;
249 return A;
250 }
251
252private:
253 NodeArray Nodes;
254 RootArray Roots;
255 ShadowStackArray ShadowStack;
256 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
257
258 const Allocators &GetGlobalAllocators() {
259 static const Allocators A = [] { return InitAllocators(); }();
260 return A;
261 }
262
263public:
264 explicit FunctionCallTrie(const Allocators &A)
265 : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
266 ShadowStack(*A.ShadowStackAllocator),
267 NodeIdPairAllocator(A.NodeIdPairAllocator) {}
268
269 FunctionCallTrie() : FunctionCallTrie(GetGlobalAllocators()) {}
270
271 void enterFunction(int32_t FId, uint64_t TSC) {
272 // This function primarily deals with ensuring that the ShadowStack is
273 // consistent and ready for when an exit event is encountered.
274 if (UNLIKELY(ShadowStack.empty())) {
275 auto NewRoot =
276 Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
277 if (UNLIKELY(NewRoot == nullptr))
278 return;
279 Roots.Append(NewRoot);
280 ShadowStack.AppendEmplace(FId, TSC, NewRoot);
281 return;
282 }
283
284 auto &Top = ShadowStack.back();
285 auto TopNode = Top.NodePtr;
286
287 // If we've seen this callee before, then we just access that node and place
288 // that on the top of the stack.
289 auto Callee = TopNode->Callees.find_element(
290 [FId](const NodeIdPair &NR) { return NR.FId == FId; });
291 if (Callee != nullptr) {
292 CHECK_NE(Callee->NodePtr, nullptr);
293 ShadowStack.AppendEmplace(FId, TSC, Callee->NodePtr);
294 return;
295 }
296
297 // This means we've never seen this stack before, create a new node here.
298 auto NewNode =
299 Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
300 if (UNLIKELY(NewNode == nullptr))
301 return;
302 TopNode->Callees.AppendEmplace(NewNode, FId);
303 ShadowStack.AppendEmplace(FId, TSC, NewNode);
304 return;
305 }
306
307 void exitFunction(int32_t FId, uint64_t TSC) {
308 // When we exit a function, we look up the ShadowStack to see whether we've
309 // entered this function before. We do as little processing here as we can,
310 // since most of the hard work would have already been done at function
311 // entry.
312 if (UNLIKELY(ShadowStack.empty()))
313 return;
314
315 uint64_t CumulativeTreeTime = 0;
316 while (!ShadowStack.empty()) {
317 auto &Top = ShadowStack.back();
318 auto TopNode = Top.NodePtr;
319 auto TopFId = TopNode->FId;
320 auto LocalTime = TSC - Top.EntryTSC;
321 TopNode->CallCount++;
322 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
323 CumulativeTreeTime += LocalTime;
324 ShadowStack.trim(1);
325
326 // TODO: Update the histogram for the node.
327 if (TopFId == FId)
328 break;
329 }
330 }
331
332 const RootArray &getRoots() const { return Roots; }
333
334 // The deepCopyInto operation will update the provided FunctionCallTrie by
335 // re-creating the contents of this particular FunctionCallTrie in the other
336 // FunctionCallTrie. It will do this using a Depth First Traversal from the
337 // roots, and while doing so recreating the traversal in the provided
338 // FunctionCallTrie.
339 //
340 // This operation will *not* destroy the state in `O`, and thus may cause some
341 // duplicate entries in `O` if it is not empty.
342 //
343 // This function is *not* thread-safe, and may require external
344 // synchronisation of both "this" and |O|.
345 //
346 // This function must *not* be called with a non-empty FunctionCallTrie |O|.
347 void deepCopyInto(FunctionCallTrie &O) const {
348 DCHECK(O.getRoots().empty());
349 for (const auto Root : getRoots()) {
350 // Add a node in O for this root.
351 auto NewRoot = O.Nodes.AppendEmplace(
352 nullptr, *O.NodeIdPairAllocator, Root->CallCount,
353 Root->CumulativeLocalTime, Root->FId);
354 O.Roots.Append(NewRoot);
355
356 // We then push the root into a stack, to use as the parent marker for new
357 // nodes we push in as we're traversing depth-first down the call tree.
358 struct NodeAndParent {
359 FunctionCallTrie::Node *Node;
360 FunctionCallTrie::Node *NewNode;
361 };
362 using Stack = Array<NodeAndParent>;
363
364 typename Stack::AllocatorType StackAllocator(
365 profilerFlags()->stack_allocator_max, 0);
366 Stack DFSStack(StackAllocator);
367
368 // TODO: Figure out what to do if we fail to allocate any more stack
369 // space. Maybe warn or report once?
370 DFSStack.Append(NodeAndParent{Root, NewRoot});
371 while (!DFSStack.empty()) {
372 NodeAndParent NP = DFSStack.back();
373 DCHECK_NE(NP.Node, nullptr);
374 DCHECK_NE(NP.NewNode, nullptr);
375 DFSStack.trim(1);
376 for (const auto Callee : NP.Node->Callees) {
377 auto NewNode = O.Nodes.AppendEmplace(
378 NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
379 Callee.NodePtr->CumulativeLocalTime, Callee.FId);
380 DCHECK_NE(NewNode, nullptr);
381 NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
382 DFSStack.Append(NodeAndParent{Callee.NodePtr, NewNode});
383 }
384 }
385 }
386 }
387
388 // The mergeInto operation will update the provided FunctionCallTrie by
389 // traversing the current trie's roots and updating (i.e. merging) the data in
390 // the nodes with the data in the target's nodes. If the node doesn't exist in
391 // the provided trie, we add a new one in the right position, and inherit the
392 // data from the original (current) trie, along with all its callees.
393 //
394 // This function is *not* thread-safe, and may require external
395 // synchronisation of both "this" and |O|.
396 void mergeInto(FunctionCallTrie &O) const {
397 struct NodeAndTarget {
398 FunctionCallTrie::Node *OrigNode;
399 FunctionCallTrie::Node *TargetNode;
400 };
401 using Stack = Array<NodeAndTarget>;
402 typename Stack::AllocatorType StackAllocator(
403 profilerFlags()->stack_allocator_max, 0);
404 Stack DFSStack(StackAllocator);
405
406 for (const auto Root : getRoots()) {
407 Node *TargetRoot = nullptr;
408 auto R = O.Roots.find_element(
409 [&](const Node *Node) { return Node->FId == Root->FId; });
410 if (R == nullptr) {
411 TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
412 0, Root->FId);
413 O.Roots.Append(TargetRoot);
414 } else {
415 TargetRoot = *R;
416 }
417
418 DFSStack.Append(NodeAndTarget{Root, TargetRoot});
419 while (!DFSStack.empty()) {
420 NodeAndTarget NT = DFSStack.back();
421 DCHECK_NE(NT.OrigNode, nullptr);
422 DCHECK_NE(NT.TargetNode, nullptr);
423 DFSStack.trim(1);
424 // TODO: Update the histogram as well when we have it ready.
425 NT.TargetNode->CallCount += NT.OrigNode->CallCount;
426 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
427 for (const auto Callee : NT.OrigNode->Callees) {
428 auto TargetCallee = NT.TargetNode->Callees.find_element(
429 [&](const FunctionCallTrie::NodeIdPair &C) {
430 return C.FId == Callee.FId;
431 });
432 if (TargetCallee == nullptr) {
433 auto NewTargetNode = O.Nodes.AppendEmplace(
434 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
435 TargetCallee =
436 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
437 }
438 DFSStack.Append(NodeAndTarget{Callee.NodePtr, TargetCallee->NodePtr});
439 }
440 }
441 }
442 }
443};
444
445} // namespace __xray
446
447#endif // XRAY_FUNCTION_CALL_TRIE_H