blob: f0b411cd3c43020e3dce977347b36eeb35b7a608 [file] [log] [blame]
Clement Courbet44b4c542018-06-19 11:28:59 +00001//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "../Target.h"
10
Clement Courbet4860b982018-06-26 08:49:30 +000011#include "../Latency.h"
12#include "../Uops.h"
Clement Courbet717c9762018-06-28 07:41:16 +000013#include "MCTargetDesc/X86BaseInfo.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000014#include "MCTargetDesc/X86MCTargetDesc.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000015#include "X86.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000016#include "X86RegisterInfo.h"
Clement Courbete7851692018-07-03 06:17:05 +000017#include "X86Subtarget.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000018#include "llvm/MC/MCInstBuilder.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000019
Clement Courbet44b4c542018-06-19 11:28:59 +000020namespace exegesis {
21
22namespace {
23
Clement Courbet717c9762018-06-28 07:41:16 +000024// Common code for X86 Uops and Latency runners.
25template <typename Impl> class X86BenchmarkRunner : public Impl {
26 using Impl::Impl;
Clement Courbet4860b982018-06-26 08:49:30 +000027
28 llvm::Expected<SnippetPrototype>
29 generatePrototype(unsigned Opcode) const override {
Clement Courbet717c9762018-06-28 07:41:16 +000030 // Test whether we can generate a snippet for this instruction.
31 const auto &InstrInfo = this->State.getInstrInfo();
32 const auto OpcodeName = InstrInfo.getName(Opcode);
33 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
34 OpcodeName.startswith("ADJCALLSTACK")) {
35 return llvm::make_error<BenchmarkFailure>(
36 "Unsupported opcode: Push/Pop/AdjCallStack");
Clement Courbet4860b982018-06-26 08:49:30 +000037 }
Clement Courbet717c9762018-06-28 07:41:16 +000038
39 // Handle X87.
40 const auto &InstrDesc = InstrInfo.get(Opcode);
41 const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
42 const Instruction Instr(InstrDesc, this->RATC);
43 switch (FPInstClass) {
44 case llvm::X86II::NotFP:
45 break;
46 case llvm::X86II::ZeroArgFP:
47 return Impl::handleZeroArgFP(Instr);
48 case llvm::X86II::OneArgFP:
49 return Impl::handleOneArgFP(Instr); // fstp ST(0)
50 case llvm::X86II::OneArgFPRW:
51 case llvm::X86II::TwoArgFP: {
52 // These are instructions like
53 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55 // They are intrinsically serial and do not modify the state of the stack.
56 // We generate the same code for latency and uops.
57 return this->generateSelfAliasingPrototype(Instr);
58 }
59 case llvm::X86II::CompareFP:
60 return Impl::handleCompareFP(Instr);
61 case llvm::X86II::CondMovFP:
62 return Impl::handleCondMovFP(Instr);
63 case llvm::X86II::SpecialFP:
64 return Impl::handleSpecialFP(Instr);
65 default:
66 llvm_unreachable("Unknown FP Type!");
67 }
68
69 // Fallback to generic implementation.
70 return Impl::Base::generatePrototype(Opcode);
Clement Courbet4860b982018-06-26 08:49:30 +000071 }
72};
73
Clement Courbet717c9762018-06-28 07:41:16 +000074class X86LatencyImpl : public LatencyBenchmarkRunner {
75protected:
76 using Base = LatencyBenchmarkRunner;
77 using Base::Base;
Clement Courbet4860b982018-06-26 08:49:30 +000078 llvm::Expected<SnippetPrototype>
Clement Courbet717c9762018-06-28 07:41:16 +000079 handleZeroArgFP(const Instruction &Instr) const {
80 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
81 }
82 llvm::Expected<SnippetPrototype>
83 handleOneArgFP(const Instruction &Instr) const {
84 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
85 }
86 llvm::Expected<SnippetPrototype>
87 handleCompareFP(const Instruction &Instr) const {
88 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
89 }
90 llvm::Expected<SnippetPrototype>
91 handleCondMovFP(const Instruction &Instr) const {
92 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
93 }
94 llvm::Expected<SnippetPrototype>
95 handleSpecialFP(const Instruction &Instr) const {
96 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
97 }
98};
99
100class X86UopsImpl : public UopsBenchmarkRunner {
101protected:
102 using Base = UopsBenchmarkRunner;
103 using Base::Base;
104 llvm::Expected<SnippetPrototype>
105 handleZeroArgFP(const Instruction &Instr) const {
106 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
107 }
108 llvm::Expected<SnippetPrototype>
109 handleOneArgFP(const Instruction &Instr) const {
110 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
111 }
112 llvm::Expected<SnippetPrototype>
113 handleCompareFP(const Instruction &Instr) const {
114 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
115 }
116 llvm::Expected<SnippetPrototype>
117 handleCondMovFP(const Instruction &Instr) const {
118 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
119 }
120 llvm::Expected<SnippetPrototype>
121 handleSpecialFP(const Instruction &Instr) const {
122 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
Clement Courbet4860b982018-06-26 08:49:30 +0000123 }
124};
125
Clement Courbet44b4c542018-06-19 11:28:59 +0000126class ExegesisX86Target : public ExegesisTarget {
Clement Courbet6fd00e32018-06-20 11:54:35 +0000127 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
128 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
129 // FIXME: Enable when the exegesis assembler no longer does
130 // Properties.reset(TracksLiveness);
Clement Courbet717c9762018-06-28 07:41:16 +0000131 PM.add(llvm::createX86FloatingPointStackifierPass());
Clement Courbet6fd00e32018-06-20 11:54:35 +0000132 }
133
Clement Courbete7851692018-07-03 06:17:05 +0000134 std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
135 unsigned Reg) const override {
136 // GPR.
Clement Courbeta5334922018-07-02 06:39:55 +0000137 if (llvm::X86::GR8RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000138 return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000139 if (llvm::X86::GR16RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000140 return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000141 if (llvm::X86::GR32RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000142 return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000143 if (llvm::X86::GR64RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000144 return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
Clement Courbete7851692018-07-03 06:17:05 +0000145 // MMX.
146 if (llvm::X86::VR64RegClass.contains(Reg))
147 return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
148 // {X,Y,Z}MM.
149 if (llvm::X86::VR128XRegClass.contains(Reg)) {
150 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
151 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
152 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
153 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
154 return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
155 }
156 if (llvm::X86::VR256XRegClass.contains(Reg)) {
157 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
158 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
Clement Courbeta51efc22018-06-25 13:12:02 +0000159 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
Clement Courbete7851692018-07-03 06:17:05 +0000160 }
Clement Courbeta5334922018-07-02 06:39:55 +0000161 if (llvm::X86::VR512RegClass.contains(Reg))
Clement Courbete7851692018-07-03 06:17:05 +0000162 return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
163 // X87.
Clement Courbet717c9762018-06-28 07:41:16 +0000164 if (llvm::X86::RFP32RegClass.contains(Reg) ||
165 llvm::X86::RFP64RegClass.contains(Reg) ||
Clement Courbeta5334922018-07-02 06:39:55 +0000166 llvm::X86::RFP80RegClass.contains(Reg))
Clement Courbet717c9762018-06-28 07:41:16 +0000167 return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
Clement Courbeta51efc22018-06-25 13:12:02 +0000168 return {};
169 }
170
Clement Courbet4860b982018-06-26 08:49:30 +0000171 std::unique_ptr<BenchmarkRunner>
172 createLatencyBenchmarkRunner(const LLVMState &State) const override {
Clement Courbete7851692018-07-03 06:17:05 +0000173 return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000174 }
175
176 std::unique_ptr<BenchmarkRunner>
177 createUopsBenchmarkRunner(const LLVMState &State) const override {
Clement Courbet717c9762018-06-28 07:41:16 +0000178 return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000179 }
180
Clement Courbet44b4c542018-06-19 11:28:59 +0000181 bool matchesArch(llvm::Triple::ArchType Arch) const override {
182 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
183 }
Clement Courbeta51efc22018-06-25 13:12:02 +0000184
185private:
186 // setRegToConstant() specialized for a vector register of size
187 // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
188 // register load.
189 static std::vector<llvm::MCInst>
190 setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
191 const unsigned RMOpcode) {
192 // There is no instruction to directly set XMM, go through memory.
193 // Since vector values can be interpreted as integers of various sizes (8
194 // to 64 bits) as well as floats and double, so we chose an immediate
195 // value that has set bits for all byte values and is a normal float/
196 // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
197 // interpreted as a float.
198 constexpr const uint64_t kImmValue = 0x40404040ull;
199 std::vector<llvm::MCInst> Result;
200 // Allocate scratch memory on the stack.
201 Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8)
202 .addReg(llvm::X86::RSP)
203 .addReg(llvm::X86::RSP)
204 .addImm(RegSizeBytes));
205 // Fill scratch memory.
206 for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) {
207 Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi)
208 // Address = ESP
209 .addReg(llvm::X86::RSP) // BaseReg
210 .addImm(1) // ScaleAmt
211 .addReg(0) // IndexReg
212 .addImm(Disp) // Disp
213 .addReg(0) // Segment
214 // Immediate.
215 .addImm(kImmValue));
216 }
217 // Load Reg from scratch memory.
218 Result.push_back(llvm::MCInstBuilder(RMOpcode)
219 .addReg(Reg)
220 // Address = ESP
221 .addReg(llvm::X86::RSP) // BaseReg
222 .addImm(1) // ScaleAmt
223 .addReg(0) // IndexReg
224 .addImm(0) // Disp
225 .addReg(0)); // Segment
226 // Release scratch memory.
227 Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8)
228 .addReg(llvm::X86::RSP)
229 .addReg(llvm::X86::RSP)
230 .addImm(RegSizeBytes));
231 return Result;
232 }
Clement Courbet44b4c542018-06-19 11:28:59 +0000233};
234
235} // namespace
236
Clement Courbetcff2caa2018-06-25 11:22:23 +0000237static ExegesisTarget *getTheExegesisX86Target() {
Clement Courbet44b4c542018-06-19 11:28:59 +0000238 static ExegesisX86Target Target;
239 return &Target;
240}
241
242void InitializeX86ExegesisTarget() {
243 ExegesisTarget::registerTarget(getTheExegesisX86Target());
244}
245
Clement Courbetcff2caa2018-06-25 11:22:23 +0000246} // namespace exegesis