[llvm-mca] Add the ability to mark regions of code for analysis (PR36875)
This patch teaches llvm-mca how to parse code comments in search for special
"markers" used to select regions of code.
Example:
# LLVM-MCA-BEGIN My Code Region
....
# LLVM-MCA-END
The MCAsmLexer now delegates to an object of class MCACommentParser (i.e. an
AsmCommentConsumer) the parsing of code comments to search for begin/end code
region markers.
A comment starting with substring "LLVM-MCA-BEGIN" marks the beginning of a new
region of code. A comment starting with substring "LLVM-MCA-END" marks the end
of the last region.
This implementation doesn't allow regions to overlap. Each region can have a
optional description; internally, each region is identified by a range of source
code locations (SMLoc).
MCInst objects are added to a region R only if the source location for the
MCInst is in the range of locations specified by R.
By default, the tool allocates an implicit "Default" code region which contains
every source location. See new tests llvm-mca-marker-*.s for a few examples.
A new Backend object is created for every region. So, the analysis is conducted
on every parsed code region. The final report is the union of the reports
generated for every code region. Note that empty regions are skipped.
Special "[#] Code Region - ..." strings are used in the report to mark the
portion which is specific to a code region only. For example, see
llvm-mca-markers-5.s.
Differential Revision: https://reviews.llvm.org/D45433
llvm-svn: 329590
diff --git a/llvm/tools/llvm-mca/CMakeLists.txt b/llvm/tools/llvm-mca/CMakeLists.txt
index 2964fde..0a29f3b 100644
--- a/llvm/tools/llvm-mca/CMakeLists.txt
+++ b/llvm/tools/llvm-mca/CMakeLists.txt
@@ -13,6 +13,7 @@
Backend.cpp
BackendPrinter.cpp
BackendStatistics.cpp
+ CodeRegion.cpp
Dispatch.cpp
HWEventListener.cpp
InstrBuilder.cpp
diff --git a/llvm/tools/llvm-mca/CodeRegion.cpp b/llvm/tools/llvm-mca/CodeRegion.cpp
new file mode 100644
index 0000000..8968659
--- /dev/null
+++ b/llvm/tools/llvm-mca/CodeRegion.cpp
@@ -0,0 +1,66 @@
+//===-------------------------- CodeRegion.cpp -----------------*- C++ -* -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods from the CodeRegions interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CodeRegion.h"
+
+using namespace llvm;
+
+namespace mca {
+
+bool CodeRegion::isLocInRange(SMLoc Loc) const {
+ if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer())
+ return false;
+ if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer())
+ return false;
+ return true;
+}
+
+void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
+ assert(!Regions.empty() && "Missing Default region");
+ const CodeRegion &CurrentRegion = *Regions.back();
+ if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Warning,
+ "Ignoring invalid region start");
+ return;
+ }
+
+ // Remove the default region if there are user defined regions.
+ if (!CurrentRegion.startLoc().isValid())
+ Regions.erase(Regions.begin());
+ addRegion(Description, Loc);
+}
+
+void CodeRegions::endRegion(SMLoc Loc) {
+ assert(!Regions.empty() && "Missing Default region");
+ CodeRegion &CurrentRegion = *Regions.back();
+ if (CurrentRegion.endLoc().isValid()) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Warning, "Ignoring invalid region end");
+ return;
+ }
+
+ CurrentRegion.setEndLocation(Loc);
+}
+
+void CodeRegions::addInstruction(std::unique_ptr<const MCInst> Instruction) {
+ const SMLoc &Loc = Instruction->getLoc();
+ const auto It =
+ std::find_if(Regions.rbegin(), Regions.rend(),
+ [Loc](const std::unique_ptr<CodeRegion> &Region) {
+ return Region->isLocInRange(Loc);
+ });
+ if (It != Regions.rend())
+ (*It)->addInstruction(std::move(Instruction));
+}
+
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/CodeRegion.h b/llvm/tools/llvm-mca/CodeRegion.h
new file mode 100644
index 0000000..6dc3f01
--- /dev/null
+++ b/llvm/tools/llvm-mca/CodeRegion.h
@@ -0,0 +1,131 @@
+//===-------------------------- CodeRegion.h -------------------*- C++ -* -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements class CodeRegion and CodeRegions.
+///
+/// A CodeRegion describes a region of assembly code guarded by special LLVM-MCA
+/// comment directives.
+///
+/// # LLVM-MCA-BEGIN foo
+/// ... ## asm
+/// # LLVM-MCA-END
+///
+/// A comment starting with substring LLVM-MCA-BEGIN marks the beginning of a
+/// new region of code.
+/// A comment starting with substring LLVM-MCA-END marks the end of the
+/// last-seen region of code.
+///
+/// Code regions are not allowed to overlap. Each region can have a optional
+/// description; internally, regions are described by a range of source
+/// locations (SMLoc objects).
+///
+/// An instruction (a MCInst) is added to a region R only if its location is in
+/// range [R.RangeStart, R.RangeEnd].
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_H
+#define LLVM_TOOLS_LLVM_MCA_CODEREGION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include <vector>
+
+namespace mca {
+
+/// \brief A region of assembly code.
+///
+/// It identifies a sequence of machine instructions.
+class CodeRegion {
+ // An optional descriptor for this region.
+ llvm::StringRef Description;
+ // Instructions that form this region.
+ std::vector<std::unique_ptr<const llvm::MCInst>> Instructions;
+ // Source location range.
+ llvm::SMLoc RangeStart;
+ llvm::SMLoc RangeEnd;
+
+ CodeRegion(const CodeRegion &) = delete;
+ CodeRegion &operator=(const CodeRegion &) = delete;
+
+public:
+ CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start)
+ : Description(Desc), RangeStart(Start), RangeEnd() {}
+
+ void addInstruction(std::unique_ptr<const llvm::MCInst> Instruction) {
+ Instructions.emplace_back(std::move(Instruction));
+ }
+
+ llvm::SMLoc startLoc() const { return RangeStart; }
+ llvm::SMLoc endLoc() const { return RangeEnd; }
+
+ void setEndLocation(llvm::SMLoc End) { RangeEnd = End; }
+ bool empty() const { return Instructions.empty(); }
+ bool isLocInRange(llvm::SMLoc Loc) const;
+
+ const std::vector<std::unique_ptr<const llvm::MCInst>> &
+ getInstructions() const {
+ return Instructions;
+ }
+
+ llvm::StringRef getDescription() const { return Description; }
+};
+
+class CodeRegions {
+ // A source manager. Used by the tool to generate meaningful warnings.
+ llvm::SourceMgr &SM;
+
+ std::vector<std::unique_ptr<CodeRegion>> Regions;
+
+ // Construct a new region of code guarded by LLVM-MCA comments.
+ void addRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
+ Regions.emplace_back(llvm::make_unique<CodeRegion>(Description, Loc));
+ }
+
+ CodeRegions(const CodeRegions &) = delete;
+ CodeRegions &operator=(const CodeRegions &) = delete;
+
+public:
+ typedef std::vector<std::unique_ptr<CodeRegion>>::iterator iterator;
+ typedef std::vector<std::unique_ptr<CodeRegion>>::const_iterator
+ const_iterator;
+
+ iterator begin() { return Regions.begin(); }
+ iterator end() { return Regions.end(); }
+ const_iterator begin() const { return Regions.cbegin(); }
+ const_iterator end() const { return Regions.cend(); }
+
+ void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
+ void endRegion(llvm::SMLoc Loc);
+ void addInstruction(std::unique_ptr<const llvm::MCInst> Instruction);
+
+ CodeRegions(llvm::SourceMgr &S) : SM(S) {
+ // Create a default region for the input code sequence.
+ addRegion("Default", llvm::SMLoc());
+ }
+
+ const std::vector<std::unique_ptr<const llvm::MCInst>> &
+ getInstructionSequence(unsigned Idx) const {
+ return Regions[Idx]->getInstructions();
+ }
+
+ bool empty() const {
+ return std::all_of(Regions.begin(), Regions.end(),
+ [](const std::unique_ptr<CodeRegion> &Region) {
+ return Region->empty();
+ });
+ }
+};
+
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/SourceMgr.h b/llvm/tools/llvm-mca/SourceMgr.h
index ec8abaf..d769570 100644
--- a/llvm/tools/llvm-mca/SourceMgr.h
+++ b/llvm/tools/llvm-mca/SourceMgr.h
@@ -25,21 +25,20 @@
class SourceMgr {
using InstVec = std::vector<std::unique_ptr<const llvm::MCInst>>;
- InstVec Sequence;
+ const InstVec &Sequence;
unsigned Current;
unsigned Iterations;
static const unsigned DefaultIterations = 70;
public:
- SourceMgr(unsigned NumIterations)
- : Current(0),
+ SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations)
+ : Sequence(MCInstSequence), Current(0),
Iterations(NumIterations ? NumIterations : DefaultIterations) {}
unsigned getCurrentIteration() const { return Current / Sequence.size(); }
unsigned getNumIterations() const { return Iterations; }
unsigned size() const { return Sequence.size(); }
const InstVec &getSequence() const { return Sequence; }
- InstVec &getSequence() { return Sequence; }
bool hasNext() { return Current < (Iterations * size()); }
void updateNext() { Current++; }
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 91bcb90..3f9682d 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -23,6 +23,7 @@
#include "BackendPrinter.h"
#include "BackendStatistics.h"
+#include "CodeRegion.h"
#include "InstructionInfoView.h"
#include "InstructionTables.h"
#include "RegisterFileStatistics.h"
@@ -158,6 +159,44 @@
return TheTarget;
}
+// A comment consumer that parses strings.
+// The only valid tokens are strings.
+class MCACommentConsumer : public AsmCommentConsumer {
+public:
+ mca::CodeRegions &Regions;
+
+ MCACommentConsumer(mca::CodeRegions &R) : Regions(R) {}
+ void HandleComment(SMLoc Loc, StringRef CommentText) override {
+ // Skip empty comments.
+ StringRef Comment(CommentText);
+ if (Comment.empty())
+ return;
+
+ // Skip spaces and tabs
+ unsigned Position = Comment.find_first_not_of(" \t");
+ if (Position >= Comment.size())
+ // we reached the end of the comment. Bail out.
+ return;
+
+ Comment = Comment.drop_front(Position);
+ if (Comment.consume_front("LLVM-MCA-END")) {
+ Regions.endRegion(Loc);
+ return;
+ }
+
+ // Now try to parse string LLVM-MCA-BEGIN
+ if (!Comment.consume_front("LLVM-MCA-BEGIN"))
+ return;
+
+ // Skip spaces and tabs
+ Position = Comment.find_first_not_of(" \t");
+ if (Position < Comment.size())
+ Comment.drop_front(Position);
+ // Use the rest of the string as a descriptor for this code snippet.
+ Regions.beginRegion(Comment, Loc);
+ }
+};
+
int AssembleInput(const char *ProgName, MCAsmParser &Parser,
const Target *TheTarget, MCSubtargetInfo &STI,
MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
@@ -186,17 +225,16 @@
}
class MCStreamerWrapper final : public MCStreamer {
- using InstVec = std::vector<std::unique_ptr<const MCInst>>;
- InstVec &Insts;
+ mca::CodeRegions &Regions;
public:
- MCStreamerWrapper(MCContext &Context, InstVec &Vec)
- : MCStreamer(Context), Insts(Vec) {}
+ MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
+ : MCStreamer(Context), Regions(R) {}
// We only want to intercept the emission of new instructions.
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
bool /* unused */) override {
- Insts.emplace_back(new MCInst(Inst));
+ Regions.addInstruction(llvm::make_unique<const MCInst>(Inst));
}
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
@@ -213,7 +251,10 @@
void EmitCOFFSymbolType(int Type) override {}
void EndCOFFSymbolDef() override {}
- const InstVec &GetInstructionSequence() const { return Insts; }
+ const std::vector<std::unique_ptr<const MCInst>> &
+ GetInstructionSequence(unsigned Index) const {
+ return Regions.getInstructionSequence(Index);
+ }
};
} // end of anonymous namespace
@@ -272,9 +313,8 @@
std::unique_ptr<buffer_ostream> BOS;
- std::unique_ptr<mca::SourceMgr> S = llvm::make_unique<mca::SourceMgr>(
- PrintInstructionTables ? 1 : Iterations);
- MCStreamerWrapper Str(Ctx, S->getSequence());
+ mca::CodeRegions Regions(SrcMgr);
+ MCStreamerWrapper Str(Ctx, Regions);
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
std::unique_ptr<MCSubtargetInfo> STI(
@@ -310,10 +350,14 @@
}
std::unique_ptr<MCAsmParser> P(createMCAsmParser(SrcMgr, Ctx, Str, *MAI));
+ MCAsmLexer &Lexer = P->getLexer();
+ MCACommentConsumer CC(Regions);
+ Lexer.setCommentConsumer(&CC);
+
if (AssembleInput(ProgName, *P, TheTarget, *STI, *MCII, MCOptions))
return 1;
- if (S->isEmpty()) {
+ if (Regions.empty()) {
errs() << "error: no assembly instructions found.\n";
return 1;
}
@@ -336,49 +380,68 @@
// Create an instruction builder.
mca::InstrBuilder IB(*STI, *MCII);
- if (PrintInstructionTables) {
- mca::InstructionTables IT(STI->getSchedModel(), IB, *S);
+ // Number each region in the sequence.
+ unsigned RegionIdx = 0;
+ for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) {
+ // Skip empty code regions.
+ if (Region->empty())
+ continue;
- if (PrintInstructionInfoView) {
- IT.addView(
- llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, *S, *IP));
+ // Don't print the header of this region if it is the default region, and
+ // it doesn't have an end location.
+ if (Region->startLoc().isValid() || Region->endLoc().isValid()) {
+ TOF->os() << "\n[" << RegionIdx++ << "] Code Region";
+ StringRef Desc = Region->getDescription();
+ if (!Desc.empty())
+ TOF->os() << " - " << Desc;
+ TOF->os() << "\n\n";
}
- IT.addView(llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, *S));
- IT.run();
- IT.printReport(TOF->os());
- TOF->keep();
- return 0;
+ mca::SourceMgr S(Region->getInstructions(),
+ PrintInstructionTables ? 1 : Iterations);
+
+ if (PrintInstructionTables) {
+ mca::InstructionTables IT(STI->getSchedModel(), IB, S);
+
+ if (PrintInstructionInfoView) {
+ IT.addView(
+ llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
+ }
+
+ IT.addView(llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, S));
+ IT.run();
+ IT.printReport(TOF->os());
+ continue;
+ }
+
+ mca::Backend B(*STI, *MRI, IB, S, Width, RegisterFileSize, LoadQueueSize,
+ StoreQueueSize, AssumeNoAlias);
+ mca::BackendPrinter Printer(B);
+
+ Printer.addView(llvm::make_unique<mca::SummaryView>(S, Width));
+ if (PrintInstructionInfoView)
+ Printer.addView(
+ llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
+
+ if (PrintModeVerbose)
+ Printer.addView(llvm::make_unique<mca::BackendStatistics>(*STI));
+
+ if (PrintRegisterFileStats)
+ Printer.addView(llvm::make_unique<mca::RegisterFileStatistics>(*STI));
+
+ if (PrintResourcePressureView)
+ Printer.addView(
+ llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, S));
+
+ if (PrintTimelineView) {
+ Printer.addView(llvm::make_unique<mca::TimelineView>(
+ *STI, *IP, S, TimelineMaxIterations, TimelineMaxCycles));
+ }
+
+ B.run();
+ Printer.printReport(TOF->os());
}
- mca::Backend B(*STI, *MRI, IB, *S, Width, RegisterFileSize, LoadQueueSize,
- StoreQueueSize, AssumeNoAlias);
- mca::BackendPrinter Printer(B);
-
- Printer.addView(llvm::make_unique<mca::SummaryView>(*S, Width));
-
- if (PrintInstructionInfoView)
- Printer.addView(
- llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, *S, *IP));
-
- if (PrintModeVerbose)
- Printer.addView(llvm::make_unique<mca::BackendStatistics>(*STI));
-
- if (PrintRegisterFileStats)
- Printer.addView(llvm::make_unique<mca::RegisterFileStatistics>(*STI));
-
- if (PrintResourcePressureView)
- Printer.addView(
- llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, *S));
-
- if (PrintTimelineView) {
- Printer.addView(llvm::make_unique<mca::TimelineView>(
- *STI, *IP, *S, TimelineMaxIterations, TimelineMaxCycles));
- }
-
- B.run();
- Printer.printReport(TOF->os());
TOF->keep();
-
return 0;
}