| //===--------------------- SummaryView.cpp -------------------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// |
| /// This file implements the functionalities used by the SummaryView to print |
| /// the report information. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "SummaryView.h" |
| #include "Support.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Support/Format.h" |
| |
| namespace mca { |
| |
| #define DEBUG_TYPE "llvm-mca" |
| |
| using namespace llvm; |
| |
| void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) { |
| // We are only interested in the "instruction dispatched" events generated by |
| // the dispatch stage for instructions that are part of iteration #0. |
| if (Event.Type != HWInstructionEvent::Dispatched) |
| return; |
| |
| if (Event.IR.getSourceIndex() >= Source.size()) |
| return; |
| |
| // Update the cumulative number of resource cycles based on the processor |
| // resource usage information available from the instruction descriptor. We need to |
| // compute the cumulative number of resource cycles for every processor |
| // resource which is consumed by an instruction of the block. |
| const Instruction &Inst = *Event.IR.getInstruction(); |
| const InstrDesc &Desc = Inst.getDesc(); |
| NumMicroOps += Desc.NumMicroOps; |
| for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) { |
| if (!RU.second.size()) |
| continue; |
| |
| assert(RU.second.NumUnits && "Expected more than one unit used!"); |
| if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) { |
| ProcResourceUsage[RU.first] = RU.second.size(); |
| continue; |
| } |
| |
| ProcResourceUsage[RU.first] += RU.second.size(); |
| } |
| } |
| |
| double SummaryView::getBlockRThroughput() const { |
| assert(NumMicroOps && "Expected at least one micro opcode!"); |
| |
| SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds()); |
| computeProcResourceMasks(SM, Masks); |
| |
| // The block throughput is bounded from above by the hardware dispatch |
| // throughput. That is because the DispatchWidth is an upper bound on the |
| // number of opcodes that can be part of a single dispatch group. |
| double Max = static_cast<double>(NumMicroOps) / DispatchWidth; |
| |
| // The block throughput is also limited by the amount of hardware parallelism. |
| // The number of available resource units affects the resource pressure |
| // distributed, as well as how many blocks can be executed every cycle. |
| for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { |
| uint64_t Mask = Masks[I]; |
| const auto It = ProcResourceUsage.find_as(Mask); |
| if (It != ProcResourceUsage.end()) { |
| const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); |
| unsigned NumUnits = MCDesc.NumUnits; |
| double Throughput = static_cast<double>(It->second) / NumUnits; |
| Max = std::max(Max, Throughput); |
| } |
| } |
| |
| // The block reciprocal throughput is computed as the MAX of: |
| // - (#uOps / DispatchWidth) |
| // - (#units / resource cycles) for every consumed processor resource. |
| return Max; |
| } |
| |
| void SummaryView::printView(raw_ostream &OS) const { |
| unsigned Iterations = Source.getNumIterations(); |
| unsigned Instructions = Source.size(); |
| unsigned TotalInstructions = Instructions * Iterations; |
| double IPC = (double)TotalInstructions / TotalCycles; |
| double BlockRThroughput = getBlockRThroughput(); |
| |
| std::string Buffer; |
| raw_string_ostream TempStream(Buffer); |
| TempStream << "Iterations: " << Iterations; |
| TempStream << "\nInstructions: " << TotalInstructions; |
| TempStream << "\nTotal Cycles: " << TotalCycles; |
| TempStream << "\nDispatch Width: " << DispatchWidth; |
| TempStream << "\nIPC: " << format("%.2f", IPC); |
| TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput) |
| << '\n'; |
| TempStream.flush(); |
| OS << Buffer; |
| } |
| } // namespace mca. |