blob: fd5df82e093db21c52d61df4f645412262b18dd4 [file] [log] [blame]
Keith Wyss9420ec32017-09-07 18:07:48 +00001//===- xray-stacks.cc - XRay Function Call Stack Accounting ---------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements stack-based accounting. It takes XRay traces, and
11// collates statistics across these traces to show a breakdown of time spent
12// at various points of the stack to provide insight into which functions
13// spend the most time in terms of a call stack. We provide a few
14// sorting/filtering options for zero'ing in on the useful stacks.
15//
16//===----------------------------------------------------------------------===//
17
18#include <forward_list>
19#include <numeric>
20
21#include "func-id-helper.h"
22#include "xray-registry.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/Errc.h"
26#include "llvm/Support/ErrorHandling.h"
27#include "llvm/Support/FormatAdapters.h"
28#include "llvm/Support/FormatVariadic.h"
29#include "llvm/XRay/Graph.h"
30#include "llvm/XRay/InstrumentationMap.h"
31#include "llvm/XRay/Trace.h"
32
33using namespace llvm;
34using namespace llvm::xray;
35
36static cl::SubCommand Stack("stack", "Call stack accounting");
37static cl::list<std::string> StackInputs(cl::Positional,
38 cl::desc("<xray trace>"), cl::Required,
39 cl::sub(Stack), cl::OneOrMore);
40
41static cl::opt<bool>
42 StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
43 cl::sub(Stack), cl::init(false));
44static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing),
45 cl::desc("Alias for -keep-going"),
46 cl::sub(Stack));
47
48// TODO: Does there need to be an option to deduce tail or sibling calls?
49
50static cl::opt<std::string> StacksInstrMap(
51 "instr_map",
52 cl::desc("instrumentation map used to identify function ids. "
53 "Currently supports elf file instrumentation maps."),
54 cl::sub(Stack), cl::init(""));
55static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap),
56 cl::desc("Alias for -instr_map"),
57 cl::sub(Stack));
58
59static cl::opt<bool>
60 SeparateThreadStacks("per-thread-stacks",
61 cl::desc("Report top stacks within each thread id"),
62 cl::sub(Stack), cl::init(false));
63
64static cl::opt<bool>
65 AggregateThreads("aggregate-threads",
66 cl::desc("Aggregate stack times across threads"),
67 cl::sub(Stack), cl::init(false));
68
Keith Wyssdef3aa02017-10-12 22:47:42 +000069static cl::opt<bool>
70 DumpAllStacks("all-stacks",
71 cl::desc("Dump sum of timings for all stacks. "
72 "By default separates stacks per-thread."),
73 cl::sub(Stack), cl::init(false));
74static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks),
75 cl::desc("Alias for -all-stacks"),
76 cl::sub(Stack));
77
78// TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format
79// possibly with aggregations or just a linear trace of timings.
80enum StackOutputFormat { HUMAN, FLAMETOOL };
81
82static cl::opt<StackOutputFormat> StacksOutputFormat(
83 "stack-format",
84 cl::desc("The format that output stacks should be "
85 "output in. Only applies with all-stacks."),
86 cl::values(
87 clEnumValN(HUMAN, "human",
88 "Human readable output. Only valid without -all-stacks."),
89 clEnumValN(FLAMETOOL, "flame",
90 "Format consumable by Brendan Gregg's FlameGraph tool. "
91 "Only valid with -all-stacks.")),
92 cl::sub(Stack), cl::init(HUMAN));
93
94// Types of values for each stack in a CallTrie.
95enum class AggregationType {
96 TOTAL_TIME, // The total time spent in a stack and its callees.
97 INVOCATION_COUNT // The number of times the stack was invoked.
98};
99
100static cl::opt<AggregationType> RequestedAggregation(
101 "aggregation-type",
102 cl::desc("The type of aggregation to do on call stacks."),
103 cl::values(
104 clEnumValN(
105 AggregationType::TOTAL_TIME, "time",
106 "Capture the total time spent in an all invocations of a stack."),
107 clEnumValN(AggregationType::INVOCATION_COUNT, "count",
108 "Capture the number of times a stack was invoked. "
109 "In flamegraph mode, this count also includes invocations "
110 "of all callees.")),
111 cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME));
112
113/// A helper struct to work with formatv and XRayRecords. Makes it easier to
114/// use instrumentation map names or addresses in formatted output.
Keith Wyss9420ec32017-09-07 18:07:48 +0000115struct format_xray_record : public FormatAdapter<XRayRecord> {
116 explicit format_xray_record(XRayRecord record,
117 const FuncIdConversionHelper &conv)
118 : FormatAdapter<XRayRecord>(std::move(record)), Converter(&conv) {}
119 void format(raw_ostream &Stream, StringRef Style) override {
120 Stream << formatv(
121 "{FuncId: \"{0}\", ThreadId: \"{1}\", RecordType: \"{2}\"}",
122 Converter->SymbolOrNumber(Item.FuncId), Item.TId,
123 DecodeRecordType(Item.RecordType));
124 }
125
126private:
127 Twine DecodeRecordType(uint16_t recordType) {
128 switch (recordType) {
129 case 0:
130 return Twine("Fn Entry");
131 case 1:
132 return Twine("Fn Exit");
133 default:
134 // TODO: Add Tail exit when it is added to llvm/XRay/XRayRecord.h
135 return Twine("Unknown");
136 }
137 }
138
139 const FuncIdConversionHelper *Converter;
140};
141
142/// The stack command will take a set of XRay traces as arguments, and collects
143/// information about the stacks of instrumented functions that appear in the
144/// traces. We track the following pieces of information:
145///
146/// - Total time: amount of time/cycles accounted for in the traces.
147/// - Stack count: number of times a specific stack appears in the
148/// traces. Only instrumented functions show up in stacks.
149/// - Cumulative stack time: amount of time spent in a stack accumulated
150/// across the invocations in the traces.
151/// - Cumulative local time: amount of time spent in each instrumented
152/// function showing up in a specific stack, accumulated across the traces.
153///
154/// Example output for the kind of data we'd like to provide looks like the
155/// following:
156///
157/// Total time: 3.33234 s
158/// Stack ID: ...
159/// Stack Count: 2093
160/// # Function Local Time (%) Stack Time (%)
161/// 0 main 2.34 ms 0.07% 3.33234 s 100%
162/// 1 foo() 3.30000 s 99.02% 3.33 s 99.92%
163/// 2 bar() 30 ms 0.90% 30 ms 0.90%
164///
165/// We can also show distributions of the function call durations with
166/// statistics at each level of the stack. This works by doing the following
167/// algorithm:
168///
169/// 1. When unwinding, record the duration of each unwound function associated
170/// with the path up to which the unwinding stops. For example:
171///
172/// Step Duration (? means has start time)
173///
174/// push a <start time> a = ?
175/// push b <start time> a = ?, a->b = ?
176/// push c <start time> a = ?, a->b = ?, a->b->c = ?
177/// pop c <end time> a = ?, a->b = ?, emit duration(a->b->c)
178/// pop b <end time> a = ?, emit duration(a->b)
179/// push c <start time> a = ?, a->c = ?
180/// pop c <end time> a = ?, emit duration(a->c)
181/// pop a <end time> emit duration(a)
182///
183/// 2. We then account for the various stacks we've collected, and for each of
184/// them will have measurements that look like the following (continuing
185/// with the above simple example):
186///
187/// c : [<id("a->b->c"), [durations]>, <id("a->c"), [durations]>]
188/// b : [<id("a->b"), [durations]>]
189/// a : [<id("a"), [durations]>]
190///
191/// This allows us to compute, for each stack id, and each function that
192/// shows up in the stack, some important statistics like:
193///
194/// - median
195/// - 99th percentile
196/// - mean + stddev
197/// - count
198///
199/// 3. For cases where we don't have durations for some of the higher levels
200/// of the stack (perhaps instrumentation wasn't activated when the stack was
201/// entered), we can mark them appropriately.
202///
203/// Computing this data also allows us to implement lookup by call stack nodes,
204/// so that we can find functions that show up in multiple stack traces and
205/// show the statistical properties of that function in various contexts. We
206/// can compute information similar to the following:
207///
208/// Function: 'c'
209/// Stacks: 2 / 2
210/// Stack ID: ...
211/// Stack Count: ...
212/// # Function ...
213/// 0 a ...
214/// 1 b ...
215/// 2 c ...
216///
217/// Stack ID: ...
218/// Stack Count: ...
219/// # Function ...
220/// 0 a ...
221/// 1 c ...
222/// ----------------...
223///
224/// Function: 'b'
225/// Stacks: 1 / 2
226/// Stack ID: ...
227/// Stack Count: ...
228/// # Function ...
229/// 0 a ...
230/// 1 b ...
231/// 2 c ...
232///
233///
234/// To do this we require a Trie data structure that will allow us to represent
235/// all the call stacks of instrumented functions in an easily traversible
236/// manner when we do the aggregations and lookups. For instrumented call
237/// sequences like the following:
238///
239/// a()
240/// b()
241/// c()
242/// d()
243/// c()
244///
245/// We will have a representation like so:
246///
247/// a -> b -> c
248/// | |
249/// | +--> d
250/// |
251/// +--> c
252///
253/// We maintain a sequence of durations on the leaves and in the internal nodes
254/// as we go through and process every record from the XRay trace. We also
255/// maintain an index of unique functions, and provide a means of iterating
256/// through all the instrumented call stacks which we know about.
257
258struct TrieNode {
259 int32_t FuncId;
260 TrieNode *Parent;
261 SmallVector<TrieNode *, 4> Callees;
262 // Separate durations depending on whether the node is the deepest node in the
263 // stack.
264 SmallVector<int64_t, 4> TerminalDurations;
265 SmallVector<int64_t, 4> IntermediateDurations;
266};
267
268/// Merges together two TrieNodes with like function ids, aggregating their
269/// callee lists and durations. The caller must provide storage where new merged
270/// nodes can be allocated in the form of a linked list.
271TrieNode *mergeTrieNodes(const TrieNode &Left, const TrieNode &Right,
272 TrieNode *NewParent,
273 std::forward_list<TrieNode> &NodeStore) {
274 assert(Left.FuncId == Right.FuncId);
275 NodeStore.push_front(TrieNode{Left.FuncId, NewParent, {}, {}, {}});
276 auto I = NodeStore.begin();
277 auto *Node = &*I;
278
279 // Build a map of callees from the left side.
280 DenseMap<int32_t, TrieNode *> LeftCalleesByFuncId;
281 for (auto *Callee : Left.Callees) {
282 LeftCalleesByFuncId[Callee->FuncId] = Callee;
283 }
284
285 // Iterate through the right side, either merging with the map values or
286 // directly adding to the Callees vector. The iteration also removes any
287 // merged values from the left side map.
288 for (auto *Callee : Right.Callees) {
289 auto iter = LeftCalleesByFuncId.find(Callee->FuncId);
290 if (iter != LeftCalleesByFuncId.end()) {
291 Node->Callees.push_back(
292 mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore));
293 LeftCalleesByFuncId.erase(iter);
294 } else {
295 Node->Callees.push_back(Callee);
296 }
297 }
298
299 // Add any callees that weren't found in the right side.
300 for (auto MapPairIter : LeftCalleesByFuncId) {
301 Node->Callees.push_back(MapPairIter.second);
302 }
303
304 // Aggregate the durations.
305 for (auto duration : Left.TerminalDurations) {
306 Node->TerminalDurations.push_back(duration);
307 }
308 for (auto duration : Right.TerminalDurations) {
309 Node->TerminalDurations.push_back(duration);
310 }
311 for (auto duration : Left.IntermediateDurations) {
312 Node->IntermediateDurations.push_back(duration);
313 }
314 for (auto duration : Right.IntermediateDurations) {
315 Node->IntermediateDurations.push_back(duration);
316 }
317
318 return Node;
319}
320
Keith Wyssdef3aa02017-10-12 22:47:42 +0000321template <AggregationType AggType>
322std::size_t GetValueForStack(const TrieNode *Node);
323
324// When computing total time spent in a stack, we're adding the timings from
325// its callees and the timings from when it was a leaf.
326template <>
327std::size_t
328GetValueForStack<AggregationType::TOTAL_TIME>(const TrieNode *Node) {
329 auto TopSum = std::accumulate(Node->TerminalDurations.begin(),
330 Node->TerminalDurations.end(), 0uLL);
331 return std::accumulate(Node->IntermediateDurations.begin(),
332 Node->IntermediateDurations.end(), TopSum);
333}
334
335// Calculates how many times a function was invoked.
336// TODO: Hook up option to produce stacks
337template <>
338std::size_t
339GetValueForStack<AggregationType::INVOCATION_COUNT>(const TrieNode *Node) {
340 return Node->TerminalDurations.size() + Node->IntermediateDurations.size();
341}
342
343// Make sure there are implementations for each enum value.
344template <AggregationType T> struct DependentFalseType : std::false_type {};
345
346template <AggregationType AggType>
347std::size_t GetValueForStack(const TrieNode *Node) {
348 static_assert(DependentFalseType<AggType>::value,
349 "No implementation found for aggregation type provided.");
350 return 0;
351}
352
Keith Wyss9420ec32017-09-07 18:07:48 +0000353class StackTrie {
Keith Wyssdef3aa02017-10-12 22:47:42 +0000354 // Avoid the magic number of 4 propagated through the code with an alias.
355 // We use this SmallVector to track the root nodes in a call graph.
356 using RootVector = SmallVector<TrieNode *, 4>;
Keith Wyss9420ec32017-09-07 18:07:48 +0000357
358 // We maintain pointers to the roots of the tries we see.
Keith Wyssdef3aa02017-10-12 22:47:42 +0000359 DenseMap<uint32_t, RootVector> Roots;
Keith Wyss9420ec32017-09-07 18:07:48 +0000360
361 // We make sure all the nodes are accounted for in this list.
362 std::forward_list<TrieNode> NodeStore;
363
364 // A map of thread ids to pairs call stack trie nodes and their start times.
365 DenseMap<uint32_t, SmallVector<std::pair<TrieNode *, uint64_t>, 8>>
366 ThreadStackMap;
367
368 TrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId,
369 TrieNode *Parent) {
370 NodeStore.push_front(TrieNode{FuncId, Parent, {}, {}, {}});
371 auto I = NodeStore.begin();
372 auto *Node = &*I;
373 if (!Parent)
374 Roots[ThreadId].push_back(Node);
375 return Node;
376 }
377
378 TrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) {
379 const auto &RootsByThread = Roots[ThreadId];
380 auto I = find_if(RootsByThread,
381 [&](TrieNode *N) { return N->FuncId == FuncId; });
382 return (I == RootsByThread.end()) ? nullptr : *I;
383 }
384
385public:
386 enum class AccountRecordStatus {
387 OK, // Successfully processed
388 ENTRY_NOT_FOUND, // An exit record had no matching call stack entry
389 UNKNOWN_RECORD_TYPE
390 };
391
392 struct AccountRecordState {
393 // We keep track of whether the call stack is currently unwinding.
394 bool wasLastRecordExit;
395
396 static AccountRecordState CreateInitialState() { return {false}; }
397 };
398
399 AccountRecordStatus accountRecord(const XRayRecord &R,
400 AccountRecordState *state) {
401 auto &TS = ThreadStackMap[R.TId];
402 switch (R.Type) {
Martin Pelikan8b0cdbf2017-09-27 05:10:31 +0000403 case RecordTypes::ENTER:
404 case RecordTypes::ENTER_ARG: {
Keith Wyss9420ec32017-09-07 18:07:48 +0000405 state->wasLastRecordExit = false;
406 // When we encounter a new function entry, we want to record the TSC for
407 // that entry, and the function id. Before doing so we check the top of
408 // the stack to see if there are callees that already represent this
409 // function.
410 if (TS.empty()) {
411 auto *Root = findRootNode(R.TId, R.FuncId);
412 TS.emplace_back(Root ? Root : createTrieNode(R.TId, R.FuncId, nullptr),
413 R.TSC);
414 return AccountRecordStatus::OK;
415 }
416
417 auto &Top = TS.back();
418 auto I = find_if(Top.first->Callees,
419 [&](TrieNode *N) { return N->FuncId == R.FuncId; });
420 if (I == Top.first->Callees.end()) {
421 // We didn't find the callee in the stack trie, so we're going to
422 // add to the stack then set up the pointers properly.
423 auto N = createTrieNode(R.TId, R.FuncId, Top.first);
424 Top.first->Callees.emplace_back(N);
425
426 // Top may be invalidated after this statement.
427 TS.emplace_back(N, R.TSC);
428 } else {
429 // We found the callee in the stack trie, so we'll use that pointer
430 // instead, add it to the stack associated with the TSC.
431 TS.emplace_back(*I, R.TSC);
432 }
433 return AccountRecordStatus::OK;
434 }
Dean Michael Berris0f84a7d2017-09-18 06:08:46 +0000435 case RecordTypes::EXIT:
436 case RecordTypes::TAIL_EXIT: {
Keith Wyss9420ec32017-09-07 18:07:48 +0000437 bool wasLastRecordExit = state->wasLastRecordExit;
438 state->wasLastRecordExit = true;
439 // The exit case is more interesting, since we want to be able to deduce
440 // missing exit records. To do that properly, we need to look up the stack
441 // and see whether the exit record matches any of the entry records. If it
442 // does match, we attempt to record the durations as we pop the stack to
443 // where we see the parent.
444 if (TS.empty()) {
445 // Short circuit, and say we can't find it.
446
447 return AccountRecordStatus::ENTRY_NOT_FOUND;
448 }
449
450 auto FunctionEntryMatch =
451 find_if(reverse(TS), [&](const std::pair<TrieNode *, uint64_t> &E) {
452 return E.first->FuncId == R.FuncId;
453 });
454 auto status = AccountRecordStatus::OK;
455 if (FunctionEntryMatch == TS.rend()) {
456 status = AccountRecordStatus::ENTRY_NOT_FOUND;
457 } else {
458 // Account for offset of 1 between reverse and forward iterators. We
459 // want the forward iterator to include the function that is exited.
460 ++FunctionEntryMatch;
461 }
462 auto I = FunctionEntryMatch.base();
463 for (auto &E : make_range(I, TS.end() - 1))
464 E.first->IntermediateDurations.push_back(std::max(E.second, R.TSC) -
465 std::min(E.second, R.TSC));
466 auto &Deepest = TS.back();
467 if (wasLastRecordExit)
468 Deepest.first->IntermediateDurations.push_back(
469 std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
470 else
471 Deepest.first->TerminalDurations.push_back(
472 std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
473 TS.erase(I, TS.end());
474 return status;
475 }
476 }
477 return AccountRecordStatus::UNKNOWN_RECORD_TYPE;
478 }
479
480 bool isEmpty() const { return Roots.empty(); }
481
482 void printStack(raw_ostream &OS, const TrieNode *Top,
483 FuncIdConversionHelper &FN) {
484 // Traverse the pointers up to the parent, noting the sums, then print
485 // in reverse order (callers at top, callees down bottom).
486 SmallVector<const TrieNode *, 8> CurrentStack;
487 for (auto *F = Top; F != nullptr; F = F->Parent)
488 CurrentStack.push_back(F);
489 int Level = 0;
490 OS << formatv("{0,-5} {1,-60} {2,+12} {3,+16}\n", "lvl", "function",
491 "count", "sum");
492 for (auto *F :
493 reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) {
494 auto Sum = std::accumulate(F->IntermediateDurations.begin(),
495 F->IntermediateDurations.end(), 0LL);
496 auto FuncId = FN.SymbolOrNumber(F->FuncId);
497 OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
498 FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId,
499 F->IntermediateDurations.size(), Sum);
500 }
501 auto *Leaf = *CurrentStack.begin();
502 auto LeafSum = std::accumulate(Leaf->TerminalDurations.begin(),
503 Leaf->TerminalDurations.end(), 0LL);
504 auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId);
505 OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
506 LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..."
507 : LeafFuncId,
508 Leaf->TerminalDurations.size(), LeafSum);
509 OS << "\n";
510 }
511
512 /// Prints top stacks for each thread.
513 void printPerThread(raw_ostream &OS, FuncIdConversionHelper &FN) {
514 for (auto iter : Roots) {
515 OS << "Thread " << iter.first << ":\n";
516 print(OS, FN, iter.second);
517 OS << "\n";
518 }
519 }
520
Keith Wyssdef3aa02017-10-12 22:47:42 +0000521 /// Prints timing sums for each stack in each threads.
522 template <AggregationType AggType>
523 void printAllPerThread(raw_ostream &OS, FuncIdConversionHelper &FN,
524 StackOutputFormat format) {
525 for (auto iter : Roots) {
526 uint32_t threadId = iter.first;
527 RootVector &perThreadRoots = iter.second;
528 bool reportThreadId = true;
529 printAll<AggType>(OS, FN, perThreadRoots, threadId, reportThreadId);
530 }
531 }
532
Keith Wyss9420ec32017-09-07 18:07:48 +0000533 /// Prints top stacks from looking at all the leaves and ignoring thread IDs.
534 /// Stacks that consist of the same function IDs but were called in different
535 /// thread IDs are not considered unique in this printout.
536 void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
Keith Wyssdef3aa02017-10-12 22:47:42 +0000537 RootVector RootValues;
Keith Wyss9420ec32017-09-07 18:07:48 +0000538
539 // Function to pull the values out of a map iterator.
540 using RootsType = decltype(Roots.begin())::value_type;
541 auto MapValueFn = [](const RootsType &Value) { return Value.second; };
542
543 for (const auto &RootNodeRange :
544 make_range(map_iterator(Roots.begin(), MapValueFn),
545 map_iterator(Roots.end(), MapValueFn))) {
546 for (auto *RootNode : RootNodeRange)
547 RootValues.push_back(RootNode);
548 }
549
550 print(OS, FN, RootValues);
551 }
552
Keith Wyssdef3aa02017-10-12 22:47:42 +0000553 /// Creates a merged list of Tries for unique stacks that disregards their
554 /// thread IDs.
555 RootVector mergeAcrossThreads(std::forward_list<TrieNode> &NodeStore) {
556 RootVector MergedByThreadRoots;
Keith Wyss9420ec32017-09-07 18:07:48 +0000557 for (auto MapIter : Roots) {
558 const auto &RootNodeVector = MapIter.second;
559 for (auto *Node : RootNodeVector) {
Keith Wyssdef3aa02017-10-12 22:47:42 +0000560 auto MaybeFoundIter =
561 find_if(MergedByThreadRoots, [Node](TrieNode *elem) {
562 return Node->FuncId == elem->FuncId;
563 });
564 if (MaybeFoundIter == MergedByThreadRoots.end()) {
565 MergedByThreadRoots.push_back(Node);
Keith Wyss9420ec32017-09-07 18:07:48 +0000566 } else {
Keith Wyssdef3aa02017-10-12 22:47:42 +0000567 MergedByThreadRoots.push_back(
568 mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore));
569 MergedByThreadRoots.erase(MaybeFoundIter);
Keith Wyss9420ec32017-09-07 18:07:48 +0000570 }
571 }
572 }
Keith Wyssdef3aa02017-10-12 22:47:42 +0000573 return MergedByThreadRoots;
574 }
575
576 /// Print timing sums for all stacks merged by Thread ID.
577 template <AggregationType AggType>
578 void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN,
579 StackOutputFormat format) {
580 std::forward_list<TrieNode> AggregatedNodeStore;
581 RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
582 bool reportThreadId = false;
583 printAll<AggType>(OS, FN, MergedByThreadRoots,
584 /*threadId*/ 0, reportThreadId);
585 }
586
587 /// Merges the trie by thread id before printing top stacks.
588 void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
589 std::forward_list<TrieNode> AggregatedNodeStore;
590 RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
591 print(OS, FN, MergedByThreadRoots);
592 }
593
594 // TODO: Add a format option when more than one are supported.
595 template <AggregationType AggType>
596 void printAll(raw_ostream &OS, FuncIdConversionHelper &FN,
597 RootVector RootValues, uint32_t ThreadId, bool ReportThread) {
598 SmallVector<const TrieNode *, 16> S;
599 for (const auto *N : RootValues) {
600 S.clear();
601 S.push_back(N);
602 while (!S.empty()) {
603 auto *Top = S.pop_back_val();
604 printSingleStack<AggType>(OS, FN, ReportThread, ThreadId, Top);
605 for (const auto *C : Top->Callees)
606 S.push_back(C);
607 }
608 }
609 }
610
611 /// Prints values for stacks in a format consumable for the flamegraph.pl
612 /// tool. This is a line based format that lists each level in the stack
613 /// hierarchy in a semicolon delimited form followed by a space and a numeric
614 /// value. If breaking down by thread, the thread ID will be added as the
615 /// root level of the stack.
616 template <AggregationType AggType>
617 void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter,
618 bool ReportThread, uint32_t ThreadId,
619 const TrieNode *Node) {
620 if (ReportThread)
621 OS << "thread_" << ThreadId << ";";
622 SmallVector<const TrieNode *, 5> lineage{};
623 lineage.push_back(Node);
624 while (lineage.back()->Parent != nullptr)
625 lineage.push_back(lineage.back()->Parent);
626 while (!lineage.empty()) {
627 OS << Converter.SymbolOrNumber(lineage.back()->FuncId) << ";";
628 lineage.pop_back();
629 }
630 OS << " " << GetValueForStack<AggType>(Node) << "\n";
Keith Wyss9420ec32017-09-07 18:07:48 +0000631 }
632
633 void print(raw_ostream &OS, FuncIdConversionHelper &FN,
Keith Wyssdef3aa02017-10-12 22:47:42 +0000634 RootVector RootValues) {
Keith Wyss9420ec32017-09-07 18:07:48 +0000635 // Go through each of the roots, and traverse the call stack, producing the
636 // aggregates as you go along. Remember these aggregates and stacks, and
637 // show summary statistics about:
638 //
639 // - Total number of unique stacks
640 // - Top 10 stacks by count
641 // - Top 10 stacks by aggregate duration
642 SmallVector<std::pair<const TrieNode *, uint64_t>, 11> TopStacksByCount;
643 SmallVector<std::pair<const TrieNode *, uint64_t>, 11> TopStacksBySum;
644 auto greater_second = [](const std::pair<const TrieNode *, uint64_t> &A,
645 const std::pair<const TrieNode *, uint64_t> &B) {
646 return A.second > B.second;
647 };
648 uint64_t UniqueStacks = 0;
649 for (const auto *N : RootValues) {
650 SmallVector<const TrieNode *, 16> S;
651 S.emplace_back(N);
652
653 while (!S.empty()) {
Keith Wyssdef3aa02017-10-12 22:47:42 +0000654 auto *Top = S.pop_back_val();
Keith Wyss9420ec32017-09-07 18:07:48 +0000655
656 // We only start printing the stack (by walking up the parent pointers)
657 // when we get to a leaf function.
658 if (!Top->TerminalDurations.empty()) {
659 ++UniqueStacks;
660 auto TopSum = std::accumulate(Top->TerminalDurations.begin(),
661 Top->TerminalDurations.end(), 0uLL);
662 {
663 auto E = std::make_pair(Top, TopSum);
664 TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(),
665 TopStacksBySum.end(), E,
666 greater_second),
667 E);
668 if (TopStacksBySum.size() == 11)
669 TopStacksBySum.pop_back();
670 }
671 {
672 auto E = std::make_pair(Top, Top->TerminalDurations.size());
673 TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(),
674 TopStacksByCount.end(), E,
675 greater_second),
676 E);
677 if (TopStacksByCount.size() == 11)
678 TopStacksByCount.pop_back();
679 }
680 }
681 for (const auto *C : Top->Callees)
682 S.push_back(C);
683 }
684 }
685
686 // Now print the statistics in the end.
687 OS << "\n";
688 OS << "Unique Stacks: " << UniqueStacks << "\n";
689 OS << "Top 10 Stacks by leaf sum:\n\n";
690 for (const auto &P : TopStacksBySum) {
691 OS << "Sum: " << P.second << "\n";
692 printStack(OS, P.first, FN);
693 }
694 OS << "\n";
695 OS << "Top 10 Stacks by leaf count:\n\n";
696 for (const auto &P : TopStacksByCount) {
697 OS << "Count: " << P.second << "\n";
698 printStack(OS, P.first, FN);
699 }
700 OS << "\n";
701 }
702};
703
704std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error,
705 const XRayRecord &Record,
706 const FuncIdConversionHelper &Converter) {
707 switch (Error) {
708 case StackTrie::AccountRecordStatus::ENTRY_NOT_FOUND:
709 return formatv("Found record {0} with no matching function entry\n",
710 format_xray_record(Record, Converter));
711 default:
712 return formatv("Unknown error type for record {0}\n",
713 format_xray_record(Record, Converter));
714 }
715}
716
717static CommandRegistration Unused(&Stack, []() -> Error {
718 // Load each file provided as a command-line argument. For each one of them
719 // account to a single StackTrie, and just print the whole trie for now.
720 StackTrie ST;
721 InstrumentationMap Map;
722 if (!StacksInstrMap.empty()) {
723 auto InstrumentationMapOrError = loadInstrumentationMap(StacksInstrMap);
724 if (!InstrumentationMapOrError)
725 return joinErrors(
726 make_error<StringError>(
727 Twine("Cannot open instrumentation map: ") + StacksInstrMap,
728 std::make_error_code(std::errc::invalid_argument)),
729 InstrumentationMapOrError.takeError());
730 Map = std::move(*InstrumentationMapOrError);
731 }
732
733 if (SeparateThreadStacks && AggregateThreads)
734 return make_error<StringError>(
735 Twine("Can't specify options for per thread reporting and reporting "
736 "that aggregates threads."),
737 std::make_error_code(std::errc::invalid_argument));
738
Keith Wyssdef3aa02017-10-12 22:47:42 +0000739 if (!DumpAllStacks && StacksOutputFormat != HUMAN)
740 return make_error<StringError>(
741 Twine("Can't specify a non-human format without -all-stacks."),
742 std::make_error_code(std::errc::invalid_argument));
743
744 if (DumpAllStacks && StacksOutputFormat == HUMAN)
745 return make_error<StringError>(
746 Twine("You must specify a non-human format when reporting with "
747 "-all-stacks."),
748 std::make_error_code(std::errc::invalid_argument));
749
Keith Wyss9420ec32017-09-07 18:07:48 +0000750 symbolize::LLVMSymbolizer::Options Opts(
751 symbolize::FunctionNameKind::LinkageName, true, true, false, "");
752 symbolize::LLVMSymbolizer Symbolizer(Opts);
753 FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer,
754 Map.getFunctionAddresses());
755 // TODO: Someday, support output to files instead of just directly to
756 // standard output.
757 for (const auto &Filename : StackInputs) {
758 auto TraceOrErr = loadTraceFile(Filename);
759 if (!TraceOrErr) {
760 if (!StackKeepGoing)
761 return joinErrors(
762 make_error<StringError>(
763 Twine("Failed loading input file '") + Filename + "'",
764 std::make_error_code(std::errc::invalid_argument)),
765 TraceOrErr.takeError());
766 logAllUnhandledErrors(TraceOrErr.takeError(), errs(), "");
767 continue;
768 }
769 auto &T = *TraceOrErr;
770 StackTrie::AccountRecordState AccountRecordState =
771 StackTrie::AccountRecordState::CreateInitialState();
772 for (const auto &Record : T) {
773 auto error = ST.accountRecord(Record, &AccountRecordState);
774 if (error != StackTrie::AccountRecordStatus::OK) {
775 if (!StackKeepGoing)
776 return make_error<StringError>(
777 CreateErrorMessage(error, Record, FuncIdHelper),
778 make_error_code(errc::illegal_byte_sequence));
779 errs() << CreateErrorMessage(error, Record, FuncIdHelper);
780 }
781 }
782 }
783 if (ST.isEmpty()) {
784 return make_error<StringError>(
785 "No instrumented calls were accounted in the input file.",
786 make_error_code(errc::result_out_of_range));
787 }
Keith Wyssdef3aa02017-10-12 22:47:42 +0000788
789 // Report the stacks in a long form mode for another tool to analyze.
790 if (DumpAllStacks) {
791 if (AggregateThreads) {
792 switch (RequestedAggregation) {
793 case AggregationType::TOTAL_TIME:
794 ST.printAllAggregatingThreads<AggregationType::TOTAL_TIME>(
795 outs(), FuncIdHelper, StacksOutputFormat);
796 break;
797 case AggregationType::INVOCATION_COUNT:
798 ST.printAllAggregatingThreads<AggregationType::INVOCATION_COUNT>(
799 outs(), FuncIdHelper, StacksOutputFormat);
800 break;
Keith Wyssdef3aa02017-10-12 22:47:42 +0000801 }
802 } else {
803 switch (RequestedAggregation) {
804 case AggregationType::TOTAL_TIME:
805 ST.printAllPerThread<AggregationType::TOTAL_TIME>(outs(), FuncIdHelper,
806 StacksOutputFormat);
807 break;
808 case AggregationType::INVOCATION_COUNT:
809 ST.printAllPerThread<AggregationType::INVOCATION_COUNT>(
810 outs(), FuncIdHelper, StacksOutputFormat);
811 break;
Keith Wyssdef3aa02017-10-12 22:47:42 +0000812 }
813 }
814 return Error::success();
815 }
816
817 // We're only outputting top stacks.
Keith Wyss9420ec32017-09-07 18:07:48 +0000818 if (AggregateThreads) {
819 ST.printAggregatingThreads(outs(), FuncIdHelper);
820 } else if (SeparateThreadStacks) {
821 ST.printPerThread(outs(), FuncIdHelper);
822 } else {
823 ST.printIgnoringThreads(outs(), FuncIdHelper);
824 }
825 return Error::success();
826});