blob: c4746c62c0dde01b1766d7bc03be8e6337f46498 [file] [log] [blame]
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +00001//===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// This file implements the functionalities used by the SummaryView to print
12/// the report information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "SummaryView.h"
Andrea Di Biagio3fc20c92018-05-23 15:59:27 +000017#include "Support.h"
18#include "llvm/ADT/SmallVector.h"
Andrea Di Biagiodf5d9482018-03-23 19:40:04 +000019#include "llvm/Support/Format.h"
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +000020
21namespace mca {
22
Andrea Di Biagiodf5d9482018-03-23 19:40:04 +000023#define DEBUG_TYPE "llvm-mca"
24
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +000025using namespace llvm;
26
Andrea Di Biagio3fc20c92018-05-23 15:59:27 +000027void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
28 // We are only interested in the "instruction dispatched" events generated by
29 // the dispatch stage for instructions that are part of iteration #0.
30 if (Event.Type != HWInstructionEvent::Dispatched)
31 return;
32
33 if (Event.IR.getSourceIndex() >= Source.size())
34 return;
35
36 // Update the cumulative number of resource cycles based on the processor
Andrea Di Biagio0af81152018-05-24 17:22:14 +000037 // resource usage information available from the instruction descriptor. We
38 // need to compute the cumulative number of resource cycles for every
39 // processor resource which is consumed by an instruction of the block.
Andrea Di Biagio3fc20c92018-05-23 15:59:27 +000040 const Instruction &Inst = *Event.IR.getInstruction();
41 const InstrDesc &Desc = Inst.getDesc();
42 NumMicroOps += Desc.NumMicroOps;
43 for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
44 if (!RU.second.size())
45 continue;
46
47 assert(RU.second.NumUnits && "Expected more than one unit used!");
48 if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
49 ProcResourceUsage[RU.first] = RU.second.size();
50 continue;
51 }
52
53 ProcResourceUsage[RU.first] += RU.second.size();
54 }
55}
56
57double SummaryView::getBlockRThroughput() const {
58 assert(NumMicroOps && "Expected at least one micro opcode!");
59
60 SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
61 computeProcResourceMasks(SM, Masks);
62
63 // The block throughput is bounded from above by the hardware dispatch
64 // throughput. That is because the DispatchWidth is an upper bound on the
65 // number of opcodes that can be part of a single dispatch group.
66 double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
67
68 // The block throughput is also limited by the amount of hardware parallelism.
69 // The number of available resource units affects the resource pressure
70 // distributed, as well as how many blocks can be executed every cycle.
71 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
72 uint64_t Mask = Masks[I];
73 const auto It = ProcResourceUsage.find_as(Mask);
74 if (It != ProcResourceUsage.end()) {
75 const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
76 unsigned NumUnits = MCDesc.NumUnits;
77 double Throughput = static_cast<double>(It->second) / NumUnits;
78 Max = std::max(Max, Throughput);
79 }
80 }
81
82 // The block reciprocal throughput is computed as the MAX of:
83 // - (#uOps / DispatchWidth)
84 // - (#units / resource cycles) for every consumed processor resource.
85 return Max;
86}
87
Andrea Di Biagiodf5d9482018-03-23 19:40:04 +000088void SummaryView::printView(raw_ostream &OS) const {
Andrea Di Biagiob5229752018-03-13 17:24:32 +000089 unsigned Iterations = Source.getNumIterations();
90 unsigned Instructions = Source.size();
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +000091 unsigned TotalInstructions = Instructions * Iterations;
92 double IPC = (double)TotalInstructions / TotalCycles;
Andrea Di Biagio3fc20c92018-05-23 15:59:27 +000093 double BlockRThroughput = getBlockRThroughput();
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +000094
95 std::string Buffer;
96 raw_string_ostream TempStream(Buffer);
Andrea Di Biagio3fc20c92018-05-23 15:59:27 +000097 TempStream << "Iterations: " << Iterations;
98 TempStream << "\nInstructions: " << TotalInstructions;
99 TempStream << "\nTotal Cycles: " << TotalCycles;
100 TempStream << "\nDispatch Width: " << DispatchWidth;
101 TempStream << "\nIPC: " << format("%.2f", IPC);
Andrea Di Biagio0af81152018-05-24 17:22:14 +0000102
103 // Round to the block reciprocal throughput to the nearest tenth.
104 TempStream << "\nBlock RThroughput: "
105 << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
Andrea Di Biagio3fc20c92018-05-23 15:59:27 +0000106 << '\n';
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +0000107 TempStream.flush();
108 OS << Buffer;
109}
Andrea Di Biagio0cc66c72018-03-09 13:52:03 +0000110} // namespace mca.