blob: 594c48bbdba9becd5f776638341e51dabe470af5 [file] [log] [blame]
Clement Courbet44b4c542018-06-19 11:28:59 +00001//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "../Target.h"
10
Clement Courbet4860b982018-06-26 08:49:30 +000011#include "../Latency.h"
12#include "../Uops.h"
Clement Courbet717c9762018-06-28 07:41:16 +000013#include "MCTargetDesc/X86BaseInfo.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000014#include "MCTargetDesc/X86MCTargetDesc.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000015#include "X86.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000016#include "X86RegisterInfo.h"
17#include "llvm/MC/MCInstBuilder.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000018
Clement Courbet44b4c542018-06-19 11:28:59 +000019namespace exegesis {
20
21namespace {
22
Clement Courbet717c9762018-06-28 07:41:16 +000023// Common code for X86 Uops and Latency runners.
24template <typename Impl> class X86BenchmarkRunner : public Impl {
25 using Impl::Impl;
Clement Courbet4860b982018-06-26 08:49:30 +000026
27 llvm::Expected<SnippetPrototype>
28 generatePrototype(unsigned Opcode) const override {
Clement Courbet717c9762018-06-28 07:41:16 +000029 // Test whether we can generate a snippet for this instruction.
30 const auto &InstrInfo = this->State.getInstrInfo();
31 const auto OpcodeName = InstrInfo.getName(Opcode);
32 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
33 OpcodeName.startswith("ADJCALLSTACK")) {
34 return llvm::make_error<BenchmarkFailure>(
35 "Unsupported opcode: Push/Pop/AdjCallStack");
Clement Courbet4860b982018-06-26 08:49:30 +000036 }
Clement Courbet717c9762018-06-28 07:41:16 +000037
38 // Handle X87.
39 const auto &InstrDesc = InstrInfo.get(Opcode);
40 const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
41 const Instruction Instr(InstrDesc, this->RATC);
42 switch (FPInstClass) {
43 case llvm::X86II::NotFP:
44 break;
45 case llvm::X86II::ZeroArgFP:
46 return Impl::handleZeroArgFP(Instr);
47 case llvm::X86II::OneArgFP:
48 return Impl::handleOneArgFP(Instr); // fstp ST(0)
49 case llvm::X86II::OneArgFPRW:
50 case llvm::X86II::TwoArgFP: {
51 // These are instructions like
52 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
53 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
54 // They are intrinsically serial and do not modify the state of the stack.
55 // We generate the same code for latency and uops.
56 return this->generateSelfAliasingPrototype(Instr);
57 }
58 case llvm::X86II::CompareFP:
59 return Impl::handleCompareFP(Instr);
60 case llvm::X86II::CondMovFP:
61 return Impl::handleCondMovFP(Instr);
62 case llvm::X86II::SpecialFP:
63 return Impl::handleSpecialFP(Instr);
64 default:
65 llvm_unreachable("Unknown FP Type!");
66 }
67
68 // Fallback to generic implementation.
69 return Impl::Base::generatePrototype(Opcode);
Clement Courbet4860b982018-06-26 08:49:30 +000070 }
71};
72
Clement Courbet717c9762018-06-28 07:41:16 +000073class X86LatencyImpl : public LatencyBenchmarkRunner {
74protected:
75 using Base = LatencyBenchmarkRunner;
76 using Base::Base;
Clement Courbet4860b982018-06-26 08:49:30 +000077 llvm::Expected<SnippetPrototype>
Clement Courbet717c9762018-06-28 07:41:16 +000078 handleZeroArgFP(const Instruction &Instr) const {
79 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
80 }
81 llvm::Expected<SnippetPrototype>
82 handleOneArgFP(const Instruction &Instr) const {
83 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
84 }
85 llvm::Expected<SnippetPrototype>
86 handleCompareFP(const Instruction &Instr) const {
87 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
88 }
89 llvm::Expected<SnippetPrototype>
90 handleCondMovFP(const Instruction &Instr) const {
91 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
92 }
93 llvm::Expected<SnippetPrototype>
94 handleSpecialFP(const Instruction &Instr) const {
95 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
96 }
97};
98
99class X86UopsImpl : public UopsBenchmarkRunner {
100protected:
101 using Base = UopsBenchmarkRunner;
102 using Base::Base;
103 llvm::Expected<SnippetPrototype>
104 handleZeroArgFP(const Instruction &Instr) const {
105 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
106 }
107 llvm::Expected<SnippetPrototype>
108 handleOneArgFP(const Instruction &Instr) const {
109 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
110 }
111 llvm::Expected<SnippetPrototype>
112 handleCompareFP(const Instruction &Instr) const {
113 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
114 }
115 llvm::Expected<SnippetPrototype>
116 handleCondMovFP(const Instruction &Instr) const {
117 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
118 }
119 llvm::Expected<SnippetPrototype>
120 handleSpecialFP(const Instruction &Instr) const {
121 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
Clement Courbet4860b982018-06-26 08:49:30 +0000122 }
123};
124
Clement Courbet44b4c542018-06-19 11:28:59 +0000125class ExegesisX86Target : public ExegesisTarget {
Clement Courbet6fd00e32018-06-20 11:54:35 +0000126 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
127 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
128 // FIXME: Enable when the exegesis assembler no longer does
129 // Properties.reset(TracksLiveness);
Clement Courbet717c9762018-06-28 07:41:16 +0000130 PM.add(llvm::createX86FloatingPointStackifierPass());
Clement Courbet6fd00e32018-06-20 11:54:35 +0000131 }
132
Clement Courbeta51efc22018-06-25 13:12:02 +0000133 std::vector<llvm::MCInst>
Clement Courbet650db332018-06-26 10:52:41 +0000134 setRegToConstant(unsigned Reg) const override {
Clement Courbeta5334922018-07-02 06:39:55 +0000135 if (llvm::X86::GR8RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000136 return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000137 if (llvm::X86::GR16RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000138 return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000139 if (llvm::X86::GR32RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000140 return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000141 if (llvm::X86::GR64RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000142 return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000143 if (llvm::X86::VR128XRegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000144 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
Clement Courbeta5334922018-07-02 06:39:55 +0000145 if (llvm::X86::VR256XRegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000146 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
Clement Courbeta5334922018-07-02 06:39:55 +0000147 if (llvm::X86::VR512RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000148 return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU64Zrm);
Clement Courbet717c9762018-06-28 07:41:16 +0000149 if (llvm::X86::RFP32RegClass.contains(Reg) ||
150 llvm::X86::RFP64RegClass.contains(Reg) ||
Clement Courbeta5334922018-07-02 06:39:55 +0000151 llvm::X86::RFP80RegClass.contains(Reg))
Clement Courbet717c9762018-06-28 07:41:16 +0000152 return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
Clement Courbeta51efc22018-06-25 13:12:02 +0000153 return {};
154 }
155
Clement Courbet4860b982018-06-26 08:49:30 +0000156 std::unique_ptr<BenchmarkRunner>
157 createLatencyBenchmarkRunner(const LLVMState &State) const override {
Clement Courbet717c9762018-06-28 07:41:16 +0000158 return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(
159 State);
Clement Courbet4860b982018-06-26 08:49:30 +0000160 }
161
162 std::unique_ptr<BenchmarkRunner>
163 createUopsBenchmarkRunner(const LLVMState &State) const override {
Clement Courbet717c9762018-06-28 07:41:16 +0000164 return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000165 }
166
Clement Courbet44b4c542018-06-19 11:28:59 +0000167 bool matchesArch(llvm::Triple::ArchType Arch) const override {
168 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
169 }
Clement Courbeta51efc22018-06-25 13:12:02 +0000170
171private:
172 // setRegToConstant() specialized for a vector register of size
173 // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
174 // register load.
175 static std::vector<llvm::MCInst>
176 setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
177 const unsigned RMOpcode) {
178 // There is no instruction to directly set XMM, go through memory.
179 // Since vector values can be interpreted as integers of various sizes (8
180 // to 64 bits) as well as floats and double, so we chose an immediate
181 // value that has set bits for all byte values and is a normal float/
182 // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
183 // interpreted as a float.
184 constexpr const uint64_t kImmValue = 0x40404040ull;
185 std::vector<llvm::MCInst> Result;
186 // Allocate scratch memory on the stack.
187 Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8)
188 .addReg(llvm::X86::RSP)
189 .addReg(llvm::X86::RSP)
190 .addImm(RegSizeBytes));
191 // Fill scratch memory.
192 for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) {
193 Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi)
194 // Address = ESP
195 .addReg(llvm::X86::RSP) // BaseReg
196 .addImm(1) // ScaleAmt
197 .addReg(0) // IndexReg
198 .addImm(Disp) // Disp
199 .addReg(0) // Segment
200 // Immediate.
201 .addImm(kImmValue));
202 }
203 // Load Reg from scratch memory.
204 Result.push_back(llvm::MCInstBuilder(RMOpcode)
205 .addReg(Reg)
206 // Address = ESP
207 .addReg(llvm::X86::RSP) // BaseReg
208 .addImm(1) // ScaleAmt
209 .addReg(0) // IndexReg
210 .addImm(0) // Disp
211 .addReg(0)); // Segment
212 // Release scratch memory.
213 Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8)
214 .addReg(llvm::X86::RSP)
215 .addReg(llvm::X86::RSP)
216 .addImm(RegSizeBytes));
217 return Result;
218 }
Clement Courbet44b4c542018-06-19 11:28:59 +0000219};
220
221} // namespace
222
Clement Courbetcff2caa2018-06-25 11:22:23 +0000223static ExegesisTarget *getTheExegesisX86Target() {
Clement Courbet44b4c542018-06-19 11:28:59 +0000224 static ExegesisX86Target Target;
225 return &Target;
226}
227
228void InitializeX86ExegesisTarget() {
229 ExegesisTarget::registerTarget(getTheExegesisX86Target());
230}
231
Clement Courbetcff2caa2018-06-25 11:22:23 +0000232} // namespace exegesis