blob: 9b6e1d9b1835f70c7ca45caebdef104c40ffc25a [file] [log] [blame]
//===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements the functionalities used by the SummaryView to print
/// the report information.
///
//===----------------------------------------------------------------------===//
#include "SummaryView.h"
#include "Support.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Format.h"
namespace mca {
#define DEBUG_TYPE "llvm-mca"
using namespace llvm;
void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
// We are only interested in the "instruction dispatched" events generated by
// the dispatch stage for instructions that are part of iteration #0.
if (Event.Type != HWInstructionEvent::Dispatched)
return;
if (Event.IR.getSourceIndex() >= Source.size())
return;
// Update the cumulative number of resource cycles based on the processor
// resource usage information available from the instruction descriptor. We need to
// compute the cumulative number of resource cycles for every processor
// resource which is consumed by an instruction of the block.
const Instruction &Inst = *Event.IR.getInstruction();
const InstrDesc &Desc = Inst.getDesc();
NumMicroOps += Desc.NumMicroOps;
for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
if (!RU.second.size())
continue;
assert(RU.second.NumUnits && "Expected more than one unit used!");
if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
ProcResourceUsage[RU.first] = RU.second.size();
continue;
}
ProcResourceUsage[RU.first] += RU.second.size();
}
}
double SummaryView::getBlockRThroughput() const {
assert(NumMicroOps && "Expected at least one micro opcode!");
SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
computeProcResourceMasks(SM, Masks);
// The block throughput is bounded from above by the hardware dispatch
// throughput. That is because the DispatchWidth is an upper bound on the
// number of opcodes that can be part of a single dispatch group.
double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
// The block throughput is also limited by the amount of hardware parallelism.
// The number of available resource units affects the resource pressure
// distributed, as well as how many blocks can be executed every cycle.
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
uint64_t Mask = Masks[I];
const auto It = ProcResourceUsage.find_as(Mask);
if (It != ProcResourceUsage.end()) {
const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
unsigned NumUnits = MCDesc.NumUnits;
double Throughput = static_cast<double>(It->second) / NumUnits;
Max = std::max(Max, Throughput);
}
}
// The block reciprocal throughput is computed as the MAX of:
// - (#uOps / DispatchWidth)
// - (#units / resource cycles) for every consumed processor resource.
return Max;
}
void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
double BlockRThroughput = getBlockRThroughput();
std::string Buffer;
raw_string_ostream TempStream(Buffer);
TempStream << "Iterations: " << Iterations;
TempStream << "\nInstructions: " << TotalInstructions;
TempStream << "\nTotal Cycles: " << TotalCycles;
TempStream << "\nDispatch Width: " << DispatchWidth;
TempStream << "\nIPC: " << format("%.2f", IPC);
TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
<< '\n';
TempStream.flush();
OS << Buffer;
}
} // namespace mca.