blob: 95f760f788a85a5a0c45864934ad62003891a5cd [file] [log] [blame]
Dean Michael Berris980d93d2018-05-15 00:42:36 +00001//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of XRay, a dynamic runtime instrumentation system.
11//
12// This file defines the interface for a function call trie.
13//
14//===----------------------------------------------------------------------===//
15#ifndef XRAY_FUNCTION_CALL_TRIE_H
16#define XRAY_FUNCTION_CALL_TRIE_H
17
18#include "xray_profiler_flags.h"
19#include "xray_segmented_array.h"
20#include <utility>
Dean Michael Berris1eb8c202018-05-31 04:33:52 +000021#include <memory> // For placement new.
Dean Michael Berris980d93d2018-05-15 00:42:36 +000022
23namespace __xray {
24
25/// A FunctionCallTrie represents the stack traces of XRay instrumented
26/// functions that we've encountered, where a node corresponds to a function and
27/// the path from the root to the node its stack trace. Each node in the trie
28/// will contain some useful values, including:
29///
30/// * The cumulative amount of time spent in this particular node/stack.
31/// * The number of times this stack has appeared.
32/// * A histogram of latencies for that particular node.
33///
34/// Each node in the trie will also contain a list of callees, represented using
35/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
36/// ID of the callee, and a pointer to the node.
37///
38/// If we visualise this data structure, we'll find the following potential
39/// representation:
40///
41/// [function id node] -> [callees] [cumulative time]
42/// [call counter] [latency histogram]
43///
44/// As an example, when we have a function in this pseudocode:
45///
46/// func f(N) {
47/// g()
48/// h()
49/// for i := 1..N { j() }
50/// }
51///
52/// We may end up with a trie of the following form:
53///
54/// f -> [ g, h, j ] [...] [1] [...]
55/// g -> [ ... ] [...] [1] [...]
56/// h -> [ ... ] [...] [1] [...]
57/// j -> [ ... ] [...] [N] [...]
58///
59/// If for instance the function g() called j() like so:
60///
61/// func g() {
62/// for i := 1..10 { j() }
63/// }
64///
65/// We'll find the following updated trie:
66///
67/// f -> [ g, h, j ] [...] [1] [...]
68/// g -> [ j' ] [...] [1] [...]
69/// h -> [ ... ] [...] [1] [...]
70/// j -> [ ... ] [...] [N] [...]
71/// j' -> [ ... ] [...] [10] [...]
72///
73/// Note that we'll have a new node representing the path `f -> g -> j'` with
74/// isolated data. This isolation gives us a means of representing the stack
75/// traces as a path, as opposed to a key in a table. The alternative
76/// implementation here would be to use a separate table for the path, and use
77/// hashes of the path as an identifier to accumulate the information. We've
78/// moved away from this approach as it takes a lot of time to compute the hash
79/// every time we need to update a function's call information as we're handling
80/// the entry and exit events.
81///
82/// This approach allows us to maintain a shadow stack, which represents the
83/// currently executing path, and on function exits quickly compute the amount
84/// of time elapsed from the entry, then update the counters for the node
85/// already represented in the trie. This necessitates an efficient
86/// representation of the various data structures (the list of callees must be
87/// cache-aware and efficient to look up, and the histogram must be compact and
88/// quick to update) to enable us to keep the overheads of this implementation
89/// to the minimum.
90class FunctionCallTrie {
91public:
92 struct Node;
93
94 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
95 // standard library types in this header.
96 struct NodeIdPair {
97 Node *NodePtr;
98 int32_t FId;
99
100 // Constructor for inplace-construction.
101 NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
102 };
103
104 using NodeIdPairArray = Array<NodeIdPair>;
105 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
106
107 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
108 // number of times this node actually appeared, the cumulative amount of time
109 // for this particular node including its children call times, and just the
110 // local time spent on this node. Each Node will have the ID of the XRay
111 // instrumented function that it is associated to.
112 struct Node {
113 Node *Parent;
114 NodeIdPairArray Callees;
115 int64_t CallCount;
116 int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
117 int32_t FId;
118
119 // We add a constructor here to allow us to inplace-construct through
120 // Array<...>'s AppendEmplace.
121 Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
122 int32_t F)
123 : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
124 FId(F) {}
125
126 // TODO: Include the compact histogram.
127 };
128
129private:
130 struct ShadowStackEntry {
131 int32_t FId; // We're copying the function ID into the stack to avoid having
132 // to reach into the node just to get the function ID.
133 uint64_t EntryTSC;
134 Node *NodePtr;
135
136 // We add a constructor here to allow us to inplace-construct through
137 // Array<...>'s AppendEmplace.
138 ShadowStackEntry(int32_t F, uint64_t T, Node *N)
139 : FId(F), EntryTSC(T), NodePtr(N) {}
140 };
141
142 using NodeArray = Array<Node>;
143 using RootArray = Array<Node *>;
144 using ShadowStackArray = Array<ShadowStackEntry>;
145
146public:
147 // We collate the allocators we need into a single struct, as a convenience to
148 // allow us to initialize these as a group.
149 struct Allocators {
150 using NodeAllocatorType = NodeArray::AllocatorType;
151 using RootAllocatorType = RootArray::AllocatorType;
152 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000153
154 NodeAllocatorType *NodeAllocator = nullptr;
155 RootAllocatorType *RootAllocator = nullptr;
156 ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
157 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
158
159 Allocators() {}
160 Allocators(const Allocators &) = delete;
161 Allocators &operator=(const Allocators &) = delete;
162
163 Allocators(Allocators &&O)
164 : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
165 ShadowStackAllocator(O.ShadowStackAllocator),
166 NodeIdPairAllocator(O.NodeIdPairAllocator) {
167 O.NodeAllocator = nullptr;
168 O.RootAllocator = nullptr;
169 O.ShadowStackAllocator = nullptr;
170 O.NodeIdPairAllocator = nullptr;
171 }
172
173 Allocators &operator=(Allocators &&O) {
174 {
175 auto Tmp = O.NodeAllocator;
176 O.NodeAllocator = this->NodeAllocator;
177 this->NodeAllocator = Tmp;
178 }
179 {
180 auto Tmp = O.RootAllocator;
181 O.RootAllocator = this->RootAllocator;
182 this->RootAllocator = Tmp;
183 }
184 {
185 auto Tmp = O.ShadowStackAllocator;
186 O.ShadowStackAllocator = this->ShadowStackAllocator;
187 this->ShadowStackAllocator = Tmp;
188 }
189 {
190 auto Tmp = O.NodeIdPairAllocator;
191 O.NodeIdPairAllocator = this->NodeIdPairAllocator;
192 this->NodeIdPairAllocator = Tmp;
193 }
194 return *this;
195 }
196
197 ~Allocators() {
198 // Note that we cannot use delete on these pointers, as they need to be
199 // returned to the sanitizer_common library's internal memory tracking
200 // system.
201 if (NodeAllocator != nullptr) {
202 NodeAllocator->~NodeAllocatorType();
203 InternalFree(NodeAllocator);
204 }
205 if (RootAllocator != nullptr) {
206 RootAllocator->~RootAllocatorType();
207 InternalFree(RootAllocator);
208 }
209 if (ShadowStackAllocator != nullptr) {
210 ShadowStackAllocator->~ShadowStackAllocatorType();
211 InternalFree(ShadowStackAllocator);
212 }
213 if (NodeIdPairAllocator != nullptr) {
214 NodeIdPairAllocator->~NodeIdPairAllocatorType();
215 InternalFree(NodeIdPairAllocator);
216 }
217 }
218 };
219
220 // TODO: Support configuration of options through the arguments.
221 static Allocators InitAllocators() {
222 Allocators A;
223 auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
224 InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
225 new (NodeAllocator) Allocators::NodeAllocatorType(
226 profilerFlags()->per_thread_allocator_max, 0);
227 A.NodeAllocator = NodeAllocator;
228
229 auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
230 InternalAlloc(sizeof(Allocators::RootAllocatorType)));
231 new (RootAllocator) Allocators::RootAllocatorType(
232 profilerFlags()->per_thread_allocator_max, 0);
233 A.RootAllocator = RootAllocator;
234
235 auto ShadowStackAllocator =
236 reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
237 InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
238 new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(
239 profilerFlags()->per_thread_allocator_max, 0);
240 A.ShadowStackAllocator = ShadowStackAllocator;
241
Dean Michael Berrisd1fe5062018-05-31 05:25:47 +0000242 auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
243 InternalAlloc(sizeof(NodeIdPairAllocatorType)));
244 new (NodeIdPairAllocator)
245 NodeIdPairAllocatorType(profilerFlags()->per_thread_allocator_max, 0);
Dean Michael Berris980d93d2018-05-15 00:42:36 +0000246 A.NodeIdPairAllocator = NodeIdPairAllocator;
247 return A;
248 }
249
250private:
251 NodeArray Nodes;
252 RootArray Roots;
253 ShadowStackArray ShadowStack;
254 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
255
256 const Allocators &GetGlobalAllocators() {
257 static const Allocators A = [] { return InitAllocators(); }();
258 return A;
259 }
260
261public:
262 explicit FunctionCallTrie(const Allocators &A)
263 : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
264 ShadowStack(*A.ShadowStackAllocator),
265 NodeIdPairAllocator(A.NodeIdPairAllocator) {}
266
267 FunctionCallTrie() : FunctionCallTrie(GetGlobalAllocators()) {}
268
269 void enterFunction(int32_t FId, uint64_t TSC) {
270 // This function primarily deals with ensuring that the ShadowStack is
271 // consistent and ready for when an exit event is encountered.
272 if (UNLIKELY(ShadowStack.empty())) {
273 auto NewRoot =
274 Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
275 if (UNLIKELY(NewRoot == nullptr))
276 return;
277 Roots.Append(NewRoot);
278 ShadowStack.AppendEmplace(FId, TSC, NewRoot);
279 return;
280 }
281
282 auto &Top = ShadowStack.back();
283 auto TopNode = Top.NodePtr;
284
285 // If we've seen this callee before, then we just access that node and place
286 // that on the top of the stack.
287 auto Callee = TopNode->Callees.find_element(
288 [FId](const NodeIdPair &NR) { return NR.FId == FId; });
289 if (Callee != nullptr) {
290 CHECK_NE(Callee->NodePtr, nullptr);
291 ShadowStack.AppendEmplace(FId, TSC, Callee->NodePtr);
292 return;
293 }
294
295 // This means we've never seen this stack before, create a new node here.
296 auto NewNode =
297 Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
298 if (UNLIKELY(NewNode == nullptr))
299 return;
300 TopNode->Callees.AppendEmplace(NewNode, FId);
301 ShadowStack.AppendEmplace(FId, TSC, NewNode);
302 return;
303 }
304
305 void exitFunction(int32_t FId, uint64_t TSC) {
306 // When we exit a function, we look up the ShadowStack to see whether we've
307 // entered this function before. We do as little processing here as we can,
308 // since most of the hard work would have already been done at function
309 // entry.
310 if (UNLIKELY(ShadowStack.empty()))
311 return;
312
313 uint64_t CumulativeTreeTime = 0;
314 while (!ShadowStack.empty()) {
315 auto &Top = ShadowStack.back();
316 auto TopNode = Top.NodePtr;
317 auto TopFId = TopNode->FId;
318 auto LocalTime = TSC - Top.EntryTSC;
319 TopNode->CallCount++;
320 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
321 CumulativeTreeTime += LocalTime;
322 ShadowStack.trim(1);
323
324 // TODO: Update the histogram for the node.
325 if (TopFId == FId)
326 break;
327 }
328 }
329
330 const RootArray &getRoots() const { return Roots; }
331
332 // The deepCopyInto operation will update the provided FunctionCallTrie by
333 // re-creating the contents of this particular FunctionCallTrie in the other
334 // FunctionCallTrie. It will do this using a Depth First Traversal from the
335 // roots, and while doing so recreating the traversal in the provided
336 // FunctionCallTrie.
337 //
338 // This operation will *not* destroy the state in `O`, and thus may cause some
339 // duplicate entries in `O` if it is not empty.
340 //
341 // This function is *not* thread-safe, and may require external
342 // synchronisation of both "this" and |O|.
343 //
344 // This function must *not* be called with a non-empty FunctionCallTrie |O|.
345 void deepCopyInto(FunctionCallTrie &O) const {
346 DCHECK(O.getRoots().empty());
347 for (const auto Root : getRoots()) {
348 // Add a node in O for this root.
349 auto NewRoot = O.Nodes.AppendEmplace(
350 nullptr, *O.NodeIdPairAllocator, Root->CallCount,
351 Root->CumulativeLocalTime, Root->FId);
352 O.Roots.Append(NewRoot);
353
354 // We then push the root into a stack, to use as the parent marker for new
355 // nodes we push in as we're traversing depth-first down the call tree.
356 struct NodeAndParent {
357 FunctionCallTrie::Node *Node;
358 FunctionCallTrie::Node *NewNode;
359 };
360 using Stack = Array<NodeAndParent>;
361
362 typename Stack::AllocatorType StackAllocator(
363 profilerFlags()->stack_allocator_max, 0);
364 Stack DFSStack(StackAllocator);
365
366 // TODO: Figure out what to do if we fail to allocate any more stack
367 // space. Maybe warn or report once?
368 DFSStack.Append(NodeAndParent{Root, NewRoot});
369 while (!DFSStack.empty()) {
370 NodeAndParent NP = DFSStack.back();
371 DCHECK_NE(NP.Node, nullptr);
372 DCHECK_NE(NP.NewNode, nullptr);
373 DFSStack.trim(1);
374 for (const auto Callee : NP.Node->Callees) {
375 auto NewNode = O.Nodes.AppendEmplace(
376 NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
377 Callee.NodePtr->CumulativeLocalTime, Callee.FId);
378 DCHECK_NE(NewNode, nullptr);
379 NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
380 DFSStack.Append(NodeAndParent{Callee.NodePtr, NewNode});
381 }
382 }
383 }
384 }
385
386 // The mergeInto operation will update the provided FunctionCallTrie by
387 // traversing the current trie's roots and updating (i.e. merging) the data in
388 // the nodes with the data in the target's nodes. If the node doesn't exist in
389 // the provided trie, we add a new one in the right position, and inherit the
390 // data from the original (current) trie, along with all its callees.
391 //
392 // This function is *not* thread-safe, and may require external
393 // synchronisation of both "this" and |O|.
394 void mergeInto(FunctionCallTrie &O) const {
395 struct NodeAndTarget {
396 FunctionCallTrie::Node *OrigNode;
397 FunctionCallTrie::Node *TargetNode;
398 };
399 using Stack = Array<NodeAndTarget>;
400 typename Stack::AllocatorType StackAllocator(
401 profilerFlags()->stack_allocator_max, 0);
402 Stack DFSStack(StackAllocator);
403
404 for (const auto Root : getRoots()) {
405 Node *TargetRoot = nullptr;
406 auto R = O.Roots.find_element(
407 [&](const Node *Node) { return Node->FId == Root->FId; });
408 if (R == nullptr) {
409 TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
410 0, Root->FId);
411 O.Roots.Append(TargetRoot);
412 } else {
413 TargetRoot = *R;
414 }
415
416 DFSStack.Append(NodeAndTarget{Root, TargetRoot});
417 while (!DFSStack.empty()) {
418 NodeAndTarget NT = DFSStack.back();
419 DCHECK_NE(NT.OrigNode, nullptr);
420 DCHECK_NE(NT.TargetNode, nullptr);
421 DFSStack.trim(1);
422 // TODO: Update the histogram as well when we have it ready.
423 NT.TargetNode->CallCount += NT.OrigNode->CallCount;
424 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
425 for (const auto Callee : NT.OrigNode->Callees) {
426 auto TargetCallee = NT.TargetNode->Callees.find_element(
427 [&](const FunctionCallTrie::NodeIdPair &C) {
428 return C.FId == Callee.FId;
429 });
430 if (TargetCallee == nullptr) {
431 auto NewTargetNode = O.Nodes.AppendEmplace(
432 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
433 TargetCallee =
434 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
435 }
436 DFSStack.Append(NodeAndTarget{Callee.NodePtr, TargetCallee->NodePtr});
437 }
438 }
439 }
440 }
441};
442
443} // namespace __xray
444
445#endif // XRAY_FUNCTION_CALL_TRIE_H