[llvm-exegesis] Allow benchmarking arbitrary code snippets.

Summary:

This is a step towards fixing PR38048.

Note that right now the measurements are given per instruction. We'll
need to give measurements a per code snippet and update the analysis (PR38731).

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D52041

llvm-svn: 342947
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index c5e4464..d338be2 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -22,11 +22,17 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include <algorithm>
@@ -43,6 +49,10 @@
                llvm::cl::init(""));
 
 static llvm::cl::opt<std::string>
+    SnippetsFile("snippets-file", llvm::cl::desc("code snippets to measure"),
+                 llvm::cl::init(""));
+
+static llvm::cl::opt<std::string>
     BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init(""));
 
 static llvm::cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
@@ -91,10 +101,19 @@
 void LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET();
 #endif
 
-static unsigned GetOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
-  if (OpcodeName.empty() && (OpcodeIndex == 0))
+// Checks that only one of OpcodeName, OpcodeIndex or SnippetsFile is provided,
+// and returns the opcode index or 0 if snippets should be read from
+// `SnippetsFile`.
+static unsigned getOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
+  const size_t NumSetFlags = (OpcodeName.empty() ? 0 : 1) +
+                             (OpcodeIndex == 0 ? 0 : 1) +
+                             (SnippetsFile.empty() ? 0 : 1);
+  if (NumSetFlags != 1)
     llvm::report_fatal_error(
-        "please provide one and only one of 'opcode-index' or 'opcode-name'");
+        "please provide one and only one of 'opcode-index', 'opcode-name' or "
+        "'snippets-file'");
+  if (!SnippetsFile.empty())
+    return 0;
   if (OpcodeIndex > 0)
     return OpcodeIndex;
   // Resolve opcode name -> opcode.
@@ -120,13 +139,12 @@
 }
 
 // Generates code snippets for opcode `Opcode`.
-llvm::Expected<std::vector<BenchmarkCode>>
+static llvm::Expected<std::vector<BenchmarkCode>>
 generateSnippets(const LLVMState &State, unsigned Opcode) {
   const std::unique_ptr<SnippetGenerator> Generator =
       State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State);
-  if (!Generator) {
+  if (!Generator)
     llvm::report_fatal_error("cannot create snippet generator");
-  }
 
   const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode);
   // Ignore instructions that we cannot run.
@@ -142,31 +160,178 @@
   return Generator->generateConfigurations(Opcode);
 }
 
+namespace {
+
+// An MCStreamer that reads a BenchmarkCode definition from a file.
+// The BenchmarkCode definition is just an asm file, with additional comments to
+// specify which registers should be defined or are live on entry.
+class BenchmarkCodeStreamer : public llvm::MCStreamer,
+                              public llvm::AsmCommentConsumer {
+public:
+  explicit BenchmarkCodeStreamer(llvm::MCContext *Context,
+                                 const llvm::MCRegisterInfo *TheRegInfo,
+                                 BenchmarkCode *Result)
+      : llvm::MCStreamer(*Context), RegInfo(TheRegInfo), Result(Result) {}
+
+  // Implementation of the llvm::MCStreamer interface. We only care about
+  // instructions.
+  void EmitInstruction(const llvm::MCInst &instruction,
+                       const llvm::MCSubtargetInfo &mc_subtarget_info,
+                       bool PrintSchedInfo) override {
+    Result->Instructions.push_back(instruction);
+  }
+
+  // Implementation of the llvm::AsmCommentConsumer.
+  void HandleComment(llvm::SMLoc Loc, llvm::StringRef CommentText) override {
+    CommentText = CommentText.trim();
+    if (!CommentText.consume_front("LLVM-EXEGESIS-"))
+      return;
+    if (CommentText.consume_front("DEFREG")) {
+      // LLVM-EXEGESIS-DEFREF <reg> <hex_value>
+      RegisterValue RegVal;
+      llvm::SmallVector<llvm::StringRef, 2> Parts;
+      CommentText.split(Parts, ' ', /*unlimited splits*/ -1,
+                        /*do not keep empty strings*/ false);
+      if (Parts.size() != 2) {
+        llvm::errs() << "invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText
+                     << "\n";
+        ++InvalidComments;
+      }
+      if (!(RegVal.Register = findRegisterByName(Parts[0].trim()))) {
+        llvm::errs() << "unknown register in 'LLVM-EXEGESIS-DEFREG "
+                     << CommentText << "\n";
+        ++InvalidComments;
+        return;
+      }
+      const llvm::StringRef HexValue = Parts[1].trim();
+      RegVal.Value = llvm::APInt(
+          /* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16);
+      Result->RegisterInitialValues.push_back(std::move(RegVal));
+      return;
+    }
+    if (CommentText.consume_front("LIVEIN")) {
+      // LLVM-EXEGESIS-LIVEIN <reg>
+      if (unsigned Reg = findRegisterByName(CommentText.ltrim()))
+        Result->LiveIns.push_back(Reg);
+      else {
+        llvm::errs() << "unknown register in 'LLVM-EXEGESIS-LIVEIN "
+                     << CommentText << "\n";
+        ++InvalidComments;
+      }
+      return;
+    }
+  }
+
+  unsigned numInvalidComments() const { return InvalidComments; }
+
+private:
+  // We only care about instructions, we don't implement this part of the API.
+  void EmitCommonSymbol(llvm::MCSymbol *symbol, uint64_t size,
+                        unsigned byte_alignment) override {}
+  bool EmitSymbolAttribute(llvm::MCSymbol *symbol,
+                           llvm::MCSymbolAttr attribute) override {
+    return false;
+  }
+  void EmitValueToAlignment(unsigned byte_alignment, int64_t value,
+                            unsigned value_size,
+                            unsigned max_bytes_to_emit) override {}
+  void EmitZerofill(llvm::MCSection *section, llvm::MCSymbol *symbol,
+                    uint64_t size, unsigned byte_alignment,
+                    llvm::SMLoc Loc) override {}
+
+  unsigned findRegisterByName(const llvm::StringRef RegName) const {
+    // FIXME: Can we do better than this ?
+    for (unsigned I = 0, E = RegInfo->getNumRegs(); I < E; ++I) {
+      if (RegName == RegInfo->getName(I))
+        return I;
+    }
+    llvm::errs() << "'" << RegName
+                 << "' is not a valid register name for the target\n";
+    return 0;
+  }
+
+  const llvm::MCRegisterInfo *const RegInfo;
+  BenchmarkCode *const Result;
+  unsigned InvalidComments = 0;
+};
+
+} // namespace
+
+// Reads code snippets from file `Filename`.
+static llvm::Expected<std::vector<BenchmarkCode>>
+readSnippets(const LLVMState &State, llvm::StringRef Filename) {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferPtr =
+      llvm::MemoryBuffer::getFileOrSTDIN(Filename);
+  if (std::error_code EC = BufferPtr.getError()) {
+    return llvm::make_error<BenchmarkFailure>(
+        "cannot read snippet: " + Filename + ": " + EC.message());
+  }
+  llvm::SourceMgr SM;
+  SM.AddNewSourceBuffer(std::move(BufferPtr.get()), llvm::SMLoc());
+
+  BenchmarkCode Result;
+
+  llvm::MCObjectFileInfo ObjectFileInfo;
+  const llvm::TargetMachine &TM = State.getTargetMachine();
+  llvm::MCContext Context(TM.getMCAsmInfo(), TM.getMCRegisterInfo(),
+                          &ObjectFileInfo);
+  ObjectFileInfo.InitMCObjectFileInfo(TM.getTargetTriple(), /*PIC*/ false,
+                                      Context);
+  BenchmarkCodeStreamer Streamer(&Context, TM.getMCRegisterInfo(), &Result);
+  const std::unique_ptr<llvm::MCAsmParser> AsmParser(
+      llvm::createMCAsmParser(SM, Context, Streamer, *TM.getMCAsmInfo()));
+  if (!AsmParser)
+    return llvm::make_error<BenchmarkFailure>("cannot create asm parser");
+  AsmParser->getLexer().setCommentConsumer(&Streamer);
+
+  const std::unique_ptr<llvm::MCTargetAsmParser> TargetAsmParser(
+      TM.getTarget().createMCAsmParser(*TM.getMCSubtargetInfo(), *AsmParser,
+                                       *TM.getMCInstrInfo(),
+                                       llvm::MCTargetOptions()));
+
+  if (!TargetAsmParser)
+    return llvm::make_error<BenchmarkFailure>(
+        "cannot create target asm parser");
+  AsmParser->setTargetParser(*TargetAsmParser);
+
+  if (AsmParser->Run(false))
+    return llvm::make_error<BenchmarkFailure>("cannot parse asm file");
+  if (Streamer.numInvalidComments())
+    return llvm::make_error<BenchmarkFailure>(
+        llvm::Twine("found ")
+            .concat(llvm::Twine(Streamer.numInvalidComments()))
+            .concat(" invalid LLVM-EXEGESIS comments"));
+  return std::vector<BenchmarkCode>{std::move(Result)};
+}
+
 void benchmarkMain() {
   if (exegesis::pfm::pfmInitialize())
     llvm::report_fatal_error("cannot initialize libpfm");
 
   llvm::InitializeNativeTarget();
   llvm::InitializeNativeTargetAsmPrinter();
+  llvm::InitializeNativeTargetAsmParser();
 #ifdef LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET
   LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET();
 #endif
 
   const LLVMState State;
-  const auto Opcode = GetOpcodeOrDie(State.getInstrInfo());
+  const auto Opcode = getOpcodeOrDie(State.getInstrInfo());
 
-  // Ignore instructions without a sched class if -ignore-invalid-sched-class is
-  // passed.
-  if (IgnoreInvalidSchedClass &&
-      State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
-    llvm::errs() << "ignoring instruction without sched class\n";
-    return;
+  std::vector<BenchmarkCode> Configurations;
+  if (Opcode > 0) {
+    // Ignore instructions without a sched class if -ignore-invalid-sched-class
+    // is passed.
+    if (IgnoreInvalidSchedClass &&
+        State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
+      llvm::errs() << "ignoring instruction without sched class\n";
+      return;
+    }
+    Configurations = ExitOnErr(generateSnippets(State, Opcode));
+  } else {
+    Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
   }
 
-  // FIXME: Allow arbitrary code.
-  const std::vector<BenchmarkCode> Configurations =
-      ExitOnErr(generateSnippets(State, Opcode));
-
   const std::unique_ptr<BenchmarkRunner> Runner =
       State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State);
   if (!Runner) {