llvm/tools/llvm-mca/SummaryView.cpp - toolchain/llvm-project - Gitiles

 //===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 /// \file
 ///
 /// This file implements the functionalities used by the SummaryView to print
 /// the report information.
 ///
 //===----------------------------------------------------------------------===//

 #include "SummaryView.h"
 #include "Support.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Format.h"

 namespace mca {

 #define DEBUG_TYPE "llvm-mca"

 using namespace llvm;

 void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
   // We are only interested in the "instruction dispatched" events generated by
   // the dispatch stage for instructions that are part of iteration #0.
   if (Event.Type != HWInstructionEvent::Dispatched)
     return;

   if (Event.IR.getSourceIndex() >= Source.size())
     return;

   // Update the cumulative number of resource cycles based on the processor
   // resource usage information available from the instruction descriptor. We need to
   // compute the cumulative number of resource cycles for every processor
   // resource which is consumed by an instruction of the block.
   const Instruction &Inst = *Event.IR.getInstruction();
   const InstrDesc &Desc = Inst.getDesc();
   NumMicroOps += Desc.NumMicroOps;
   for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
     if (!RU.second.size())
       continue;

     assert(RU.second.NumUnits && "Expected more than one unit used!");
     if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
       ProcResourceUsage[RU.first] = RU.second.size();
       continue;
     }

     ProcResourceUsage[RU.first] += RU.second.size();
   }
 }

 double SummaryView::getBlockRThroughput() const {
   assert(NumMicroOps && "Expected at least one micro opcode!");

   SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
   computeProcResourceMasks(SM, Masks);

   // The block throughput is bounded from above by the hardware dispatch
   // throughput. That is because the DispatchWidth is an upper bound on the
   // number of opcodes that can be part of a single dispatch group.
   double Max = static_cast<double>(NumMicroOps) / DispatchWidth;

   // The block throughput is also limited by the amount of hardware parallelism.
   // The number of available resource units affects the resource pressure
   // distributed, as well as how many blocks can be executed every cycle.
   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
     uint64_t Mask = Masks[I];
     const auto It = ProcResourceUsage.find_as(Mask);
     if (It != ProcResourceUsage.end()) {
       const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
       unsigned NumUnits = MCDesc.NumUnits;
       double Throughput = static_cast<double>(It->second) / NumUnits;
       Max = std::max(Max, Throughput);
     }
   }

   // The block reciprocal throughput is computed as the MAX of:
   //  -  (#uOps / DispatchWidth)
   //  -  (#units / resource cycles) for every consumed processor resource.
   return Max;
 }

 void SummaryView::printView(raw_ostream &OS) const {
   unsigned Iterations = Source.getNumIterations();
   unsigned Instructions = Source.size();
   unsigned TotalInstructions = Instructions * Iterations;
   double IPC = (double)TotalInstructions / TotalCycles;
   double BlockRThroughput = getBlockRThroughput();

   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
   TempStream << "Iterations:        " << Iterations;
   TempStream << "\nInstructions:      " << TotalInstructions;
   TempStream << "\nTotal Cycles:      " << TotalCycles;
   TempStream << "\nDispatch Width:    " << DispatchWidth;
   TempStream << "\nIPC:               " << format("%.2f", IPC);
   TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
              << '\n';
   TempStream.flush();
   OS << Buffer;
 }
 } // namespace mca.
	//===--------------------- SummaryView.cpp -------------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	/// \file
	///
	/// This file implements the functionalities used by the SummaryView to print
	/// the report information.
	///
	//===----------------------------------------------------------------------===//

	#include "SummaryView.h"
	#include "Support.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Support/Format.h"

	namespace mca {

	#define DEBUG_TYPE "llvm-mca"

	using namespace llvm;

	void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
	// We are only interested in the "instruction dispatched" events generated by
	// the dispatch stage for instructions that are part of iteration #0.
	if (Event.Type != HWInstructionEvent::Dispatched)
	return;

	if (Event.IR.getSourceIndex() >= Source.size())
	return;

	// Update the cumulative number of resource cycles based on the processor
	// resource usage information available from the instruction descriptor. We need to
	// compute the cumulative number of resource cycles for every processor
	// resource which is consumed by an instruction of the block.
	const Instruction &Inst = *Event.IR.getInstruction();
	const InstrDesc &Desc = Inst.getDesc();
	NumMicroOps += Desc.NumMicroOps;
	for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
	if (!RU.second.size())
	continue;

	assert(RU.second.NumUnits && "Expected more than one unit used!");
	if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
	ProcResourceUsage[RU.first] = RU.second.size();
	continue;
	}

	ProcResourceUsage[RU.first] += RU.second.size();
	}
	}

	double SummaryView::getBlockRThroughput() const {
	assert(NumMicroOps && "Expected at least one micro opcode!");

	SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
	computeProcResourceMasks(SM, Masks);

	// The block throughput is bounded from above by the hardware dispatch
	// throughput. That is because the DispatchWidth is an upper bound on the
	// number of opcodes that can be part of a single dispatch group.
	double Max = static_cast<double>(NumMicroOps) / DispatchWidth;

	// The block throughput is also limited by the amount of hardware parallelism.
	// The number of available resource units affects the resource pressure
	// distributed, as well as how many blocks can be executed every cycle.
	for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
	uint64_t Mask = Masks[I];
	const auto It = ProcResourceUsage.find_as(Mask);
	if (It != ProcResourceUsage.end()) {
	const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
	unsigned NumUnits = MCDesc.NumUnits;
	double Throughput = static_cast<double>(It->second) / NumUnits;
	Max = std::max(Max, Throughput);
	}
	}

	// The block reciprocal throughput is computed as the MAX of:
	// - (#uOps / DispatchWidth)
	// - (#units / resource cycles) for every consumed processor resource.
	return Max;
	}

	void SummaryView::printView(raw_ostream &OS) const {
	unsigned Iterations = Source.getNumIterations();
	unsigned Instructions = Source.size();
	unsigned TotalInstructions = Instructions * Iterations;
	double IPC = (double)TotalInstructions / TotalCycles;
	double BlockRThroughput = getBlockRThroughput();

	std::string Buffer;
	raw_string_ostream TempStream(Buffer);
	TempStream << "Iterations: " << Iterations;
	TempStream << "\nInstructions: " << TotalInstructions;
	TempStream << "\nTotal Cycles: " << TotalCycles;
	TempStream << "\nDispatch Width: " << DispatchWidth;
	TempStream << "\nIPC: " << format("%.2f", IPC);
	TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
	<< '\n';
	TempStream.flush();
	OS << Buffer;
	}
	} // namespace mca.