[llvm-exegesis] Refactor how forbidden registers are computed.
Summary:
Right now latency generation can incorrectly select the scratch register
as a dependency-carrying register.
- Move the logic for preventing register selection from Uops
implementation to common SnippetGenerator class.
- Aliasing detection now takes a set of forbidden registers just like
random register assignment does.
Reviewers: gchatelet
Subscribers: tschuett, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68084
llvm-svn: 373048
diff --git a/llvm/tools/llvm-exegesis/lib/Latency.cpp b/llvm/tools/llvm-exegesis/lib/Latency.cpp
index 2f3fbae..112d3ac 100644
--- a/llvm/tools/llvm-exegesis/lib/Latency.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Latency.cpp
@@ -39,7 +39,8 @@
static std::vector<Instruction>
computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
- size_t MaxAliasingInstructions) {
+ size_t MaxAliasingInstructions,
+ const BitVector &ForbiddenRegisters) {
// Randomly iterate the set of instructions.
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
@@ -53,15 +54,16 @@
const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
if (OtherInstr.hasMemoryOperands())
continue;
- if (Instr.hasAliasingRegistersThrough(OtherInstr))
- AliasingInstructions.push_back(std::move(OtherInstr));
+ if (Instr.hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters))
+ AliasingInstructions.push_back(OtherInstr);
if (AliasingInstructions.size() >= MaxAliasingInstructions)
break;
}
return AliasingInstructions;
}
-static ExecutionMode getExecutionModes(const Instruction &Instr) {
+static ExecutionMode getExecutionModes(const Instruction &Instr,
+ const BitVector &ForbiddenRegisters) {
ExecutionMode EM = ExecutionMode::UNKNOWN;
if (Instr.hasAliasingImplicitRegisters())
EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
@@ -70,7 +72,7 @@
if (Instr.hasMemoryOperands())
EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
else {
- if (Instr.hasAliasingRegisters())
+ if (Instr.hasAliasingRegisters(ForbiddenRegisters))
EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
if (Instr.hasOneUseOrOneDef())
EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
@@ -80,6 +82,7 @@
static void appendCodeTemplates(const LLVMState &State,
const Instruction &Instr,
+ const BitVector &ForbiddenRegisters,
ExecutionMode ExecutionModeBit,
llvm::StringRef ExecutionClassDescription,
std::vector<CodeTemplate> &CodeTemplates) {
@@ -122,8 +125,8 @@
}
case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
// Select back-to-back non-memory instruction.
- for (const auto OtherInstr :
- computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
+ for (const auto OtherInstr : computeAliasingInstructions(
+ State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) {
const AliasingConfigurations Forward(Instr, OtherInstr);
const AliasingConfigurations Back(OtherInstr, Instr);
InstructionTemplate ThisIT(Instr);
@@ -149,13 +152,14 @@
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
llvm::Expected<std::vector<CodeTemplate>>
-LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
+LatencySnippetGenerator::generateCodeTemplates(
+ const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
std::vector<CodeTemplate> Results;
- const ExecutionMode EM = getExecutionModes(Instr);
+ const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
for (const auto EC : kExecutionClasses) {
for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
- appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
- Results);
+ appendCodeTemplates(State, Instr, ForbiddenRegisters, ExecutionModeBit,
+ EC.Description, Results);
if (!Results.empty())
break;
}
diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/Latency.h
index 7d6d96a..9ffe2d7 100644
--- a/llvm/tools/llvm-exegesis/lib/Latency.h
+++ b/llvm/tools/llvm-exegesis/lib/Latency.h
@@ -27,7 +27,8 @@
~LatencySnippetGenerator() override;
llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const override;
+ generateCodeTemplates(const Instruction &Instr,
+ const BitVector &ForbiddenRegisters) const override;
};
class LatencyBenchmarkRunner : public BenchmarkRunner {
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index 3636a1d..f44dcbd 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -183,10 +183,29 @@
return ImplDefRegs.anyCommon(ImplUseRegs);
}
+// Returns true if there are registers that are both in `A` and `B` but not in
+// `Forbidden`.
+static bool anyCommonExcludingForbidden(const BitVector &A, const BitVector &B,
+ const BitVector &Forbidden) {
+ assert(A.size() == B.size() && B.size() == Forbidden.size());
+ const auto Size = A.size();
+ for (int AIndex = A.find_first(); AIndex != -1;) {
+ const int BIndex = B.find_first_in(AIndex, Size);
+ if (BIndex == -1)
+ return false;
+ if (AIndex == BIndex && !Forbidden.test(AIndex))
+ return true;
+ AIndex = A.find_first_in(BIndex + 1, Size);
+ }
+ return false;
+}
+
bool Instruction::hasAliasingRegistersThrough(
- const Instruction &OtherInstr) const {
- return AllDefRegs.anyCommon(OtherInstr.AllUseRegs) &&
- OtherInstr.AllDefRegs.anyCommon(AllUseRegs);
+ const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const {
+ return anyCommonExcludingForbidden(AllDefRegs, OtherInstr.AllUseRegs,
+ ForbiddenRegisters) &&
+ anyCommonExcludingForbidden(OtherInstr.AllDefRegs, AllUseRegs,
+ ForbiddenRegisters);
}
bool Instruction::hasTiedRegisters() const {
@@ -194,8 +213,10 @@
Variables, [](const Variable &Var) { return Var.hasTiedOperands(); });
}
-bool Instruction::hasAliasingRegisters() const {
- return AllDefRegs.anyCommon(AllUseRegs);
+bool Instruction::hasAliasingRegisters(
+ const BitVector &ForbiddenRegisters) const {
+ return anyCommonExcludingForbidden(AllDefRegs, AllUseRegs,
+ ForbiddenRegisters);
}
bool Instruction::hasOneUseOrOneDef() const {
@@ -203,6 +224,7 @@
}
void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
+ const RegisterAliasingTrackerCache &RATC,
llvm::raw_ostream &Stream) const {
Stream << "- " << Name << "\n";
for (const auto &Op : Operands) {
@@ -251,7 +273,7 @@
Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n";
if (hasTiedRegisters())
Stream << "- hasTiedRegisters (execution is always serial)\n";
- if (hasAliasingRegisters())
+ if (hasAliasingRegisters(RATC.emptyRegisters()))
Stream << "- hasAliasingRegisters\n";
}
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index 09bd1ce..1c67ef8 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -112,10 +112,11 @@
// Repeating this instruction may execute sequentially by picking aliasing
// Use and Def registers. It may also execute in parallel by picking non
// aliasing Use and Def registers.
- bool hasAliasingRegisters() const;
+ bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const;
// Whether this instruction's registers alias with OtherInstr's registers.
- bool hasAliasingRegistersThrough(const Instruction &OtherInstr) const;
+ bool hasAliasingRegistersThrough(const Instruction &OtherInstr,
+ const BitVector &ForbiddenRegisters) const;
// Returns whether this instruction has Memory Operands.
// Repeating this instruction executes sequentially with an instruction that
@@ -129,6 +130,7 @@
// Convenient function to help with debugging.
void dump(const llvm::MCRegisterInfo &RegInfo,
+ const RegisterAliasingTrackerCache &RATC,
llvm::raw_ostream &Stream) const;
const llvm::MCInstrDesc *Description; // Never nullptr.
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index d5c790d..cd93589 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -38,14 +38,33 @@
llvm::Expected<std::vector<BenchmarkCode>>
SnippetGenerator::generateConfigurations(const Instruction &Instr) const {
- if (auto E = generateCodeTemplates(Instr)) {
- const auto &RATC = State.getRATC();
+ llvm::BitVector ForbiddenRegs = State.getRATC().reservedRegisters();
+
+ // If the instruction has memory registers, prevent the generator from
+ // using the scratch register and its aliasing registers.
+ if (Instr.hasMemoryOperands()) {
+ const auto &ET = State.getExegesisTarget();
+ unsigned ScratchSpacePointerInReg =
+ ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
+ if (ScratchSpacePointerInReg == 0)
+ return llvm::make_error<BenchmarkFailure>(
+ "Infeasible : target does not support memory instructions");
+ const auto &ScratchRegAliases =
+ State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits();
+ // If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
+ // FIXME: We could make a copy of the scratch register.
+ for (const auto &Op : Instr.Operands) {
+ if (Op.isDef() && Op.isImplicitReg() &&
+ ScratchRegAliases.test(Op.getImplicitReg()))
+ return llvm::make_error<BenchmarkFailure>(
+ "Infeasible : memory instruction uses scratch memory register");
+ }
+ ForbiddenRegs |= ScratchRegAliases;
+ }
+
+ if (auto E = generateCodeTemplates(Instr, ForbiddenRegs)) {
std::vector<BenchmarkCode> Output;
for (CodeTemplate &CT : E.get()) {
- const llvm::BitVector &ForbiddenRegs =
- CT.ScratchSpacePointerInReg
- ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits()
- : RATC.emptyRegisters();
// TODO: Generate as many BenchmarkCode as needed.
{
BenchmarkCode BC;
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
index ef44bd9..966da2a 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
@@ -69,7 +69,8 @@
private:
// API to be implemented by subclasses.
virtual llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const = 0;
+ generateCodeTemplates(const Instruction &Instr,
+ const BitVector &ForbiddenRegisters) const = 0;
};
// A global Random Number Generator to randomize configurations.
diff --git a/llvm/tools/llvm-exegesis/lib/Uops.cpp b/llvm/tools/llvm-exegesis/lib/Uops.cpp
index 8fc8fd2..6823ec8 100644
--- a/llvm/tools/llvm-exegesis/lib/Uops.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Uops.cpp
@@ -126,7 +126,7 @@
static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
const LLVMState &State, const InstructionTemplate &IT,
const ArrayRef<const Variable *> TiedVariables,
- const BitVector *ScratchSpaceAliasedRegs) {
+ const BitVector &ForbiddenRegisters) {
std::vector<InstructionTemplate> Instructions;
// Assign registers to variables in a round-robin manner. This is simple but
// ensures that the most register-constrained variable does not get starved.
@@ -135,12 +135,8 @@
assert(Var);
const Operand &Op = IT.Instr.getPrimaryOperand(*Var);
assert(Op.isReg());
- BitVector PossibleRegs = State.getRATC().emptyRegisters();
- if (ScratchSpaceAliasedRegs) {
- PossibleRegs |= *ScratchSpaceAliasedRegs;
- }
- PossibleRegs.flip();
- PossibleRegs &= Op.getRegisterAliasing().sourceBits();
+ BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits();
+ remove(PossibleRegs, ForbiddenRegisters);
PossibleRegsForVar.push_back(std::move(PossibleRegs));
}
SmallVector<int, 2> Iterators(TiedVariables.size(), 0);
@@ -168,28 +164,14 @@
}
llvm::Expected<std::vector<CodeTemplate>>
-UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
+UopsSnippetGenerator::generateCodeTemplates(
+ const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
CodeTemplate CT;
- const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr;
- if (Instr.hasMemoryOperands()) {
- const auto &ET = State.getExegesisTarget();
- CT.ScratchSpacePointerInReg =
- ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
- if (CT.ScratchSpacePointerInReg == 0)
- return llvm::make_error<BenchmarkFailure>(
- "Infeasible : target does not support memory instructions");
- ScratchSpaceAliasedRegs =
- &State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits();
- // If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
- // FIXME: We could make a copy of the scratch register.
- for (const auto &Op : Instr.Operands) {
- if (Op.isDef() && Op.isImplicitReg() &&
- ScratchSpaceAliasedRegs->test(Op.getImplicitReg()))
- return llvm::make_error<BenchmarkFailure>(
- "Infeasible : memory instruction uses scratch memory register");
- }
- }
-
+ CT.ScratchSpacePointerInReg =
+ Instr.hasMemoryOperands()
+ ? State.getExegesisTarget().getScratchMemoryRegister(
+ State.getTargetMachine().getTargetTriple())
+ : 0;
const AliasingConfigurations SelfAliasing(Instr, Instr);
InstructionTemplate IT(Instr);
if (SelfAliasing.empty()) {
@@ -208,20 +190,17 @@
if (!TiedVariables.empty()) {
CT.Info = "instruction has tied variables, using static renaming.";
CT.Instructions = generateSnippetUsingStaticRenaming(
- State, IT, TiedVariables, ScratchSpaceAliasedRegs);
+ State, IT, TiedVariables, ForbiddenRegisters);
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(std::move(CT));
}
- const auto &ReservedRegisters = State.getRATC().reservedRegisters();
// No tied variables, we pick random values for defs.
llvm::BitVector Defs(State.getRegInfo().getNumRegs());
for (const auto &Op : Instr.Operands) {
if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
- remove(PossibleRegisters, ReservedRegisters);
- // Do not use the scratch memory address register.
- if (ScratchSpaceAliasedRegs)
- remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
+ // Do not use forbidden registers.
+ remove(PossibleRegisters, ForbiddenRegisters);
assert(PossibleRegisters.any() && "No register left to choose from");
const auto RandomReg = randomBit(PossibleRegisters);
Defs.set(RandomReg);
@@ -233,10 +212,7 @@
for (const auto &Op : Instr.Operands) {
if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
- remove(PossibleRegisters, ReservedRegisters);
- // Do not use the scratch memory address register.
- if (ScratchSpaceAliasedRegs)
- remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
+ remove(PossibleRegisters, ForbiddenRegisters);
remove(PossibleRegisters, DefAliases);
assert(PossibleRegisters.any() && "No register left to choose from");
const auto RandomReg = randomBit(PossibleRegisters);
diff --git a/llvm/tools/llvm-exegesis/lib/Uops.h b/llvm/tools/llvm-exegesis/lib/Uops.h
index 077c9a8..23caff2 100644
--- a/llvm/tools/llvm-exegesis/lib/Uops.h
+++ b/llvm/tools/llvm-exegesis/lib/Uops.h
@@ -26,7 +26,8 @@
~UopsSnippetGenerator() override;
llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const override;
+ generateCodeTemplates(const Instruction &Instr,
+ const BitVector &ForbiddenRegisters) const override;
static constexpr const size_t kMinNumDifferentAddresses = 6;
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index cebcb1c..45e3611 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -182,19 +182,21 @@
using LatencySnippetGenerator::LatencySnippetGenerator;
llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const override;
+ generateCodeTemplates(const Instruction &Instr,
+ const BitVector &ForbiddenRegisters) const override;
};
} // namespace
llvm::Expected<std::vector<CodeTemplate>>
X86LatencySnippetGenerator::generateCodeTemplates(
- const Instruction &Instr) const {
+ const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr))
return std::move(E);
switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP:
- return LatencySnippetGenerator::generateCodeTemplates(Instr);
+ return LatencySnippetGenerator::generateCodeTemplates(Instr,
+ ForbiddenRegisters);
case llvm::X86II::ZeroArgFP:
case llvm::X86II::OneArgFP:
case llvm::X86II::SpecialFP:
@@ -219,19 +221,21 @@
using UopsSnippetGenerator::UopsSnippetGenerator;
llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const override;
+ generateCodeTemplates(const Instruction &Instr,
+ const BitVector &ForbiddenRegisters) const override;
};
} // namespace
llvm::Expected<std::vector<CodeTemplate>>
X86UopsSnippetGenerator::generateCodeTemplates(
- const Instruction &Instr) const {
+ const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr))
return std::move(E);
switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP:
- return UopsSnippetGenerator::generateCodeTemplates(Instr);
+ return UopsSnippetGenerator::generateCodeTemplates(Instr,
+ ForbiddenRegisters);
case llvm::X86II::ZeroArgFP:
case llvm::X86II::OneArgFP:
case llvm::X86II::SpecialFP: