blob: 29a7dcafa6c0c438513b216ac6fec92d8913e3f7 [file] [log] [blame]
Clement Courbet44b4c542018-06-19 11:28:59 +00001//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "../Target.h"
10
Clement Courbet4860b982018-06-26 08:49:30 +000011#include "../Latency.h"
12#include "../Uops.h"
Clement Courbet717c9762018-06-28 07:41:16 +000013#include "MCTargetDesc/X86BaseInfo.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000014#include "MCTargetDesc/X86MCTargetDesc.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000015#include "X86.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000016#include "X86RegisterInfo.h"
Clement Courbete7851692018-07-03 06:17:05 +000017#include "X86Subtarget.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000018#include "llvm/MC/MCInstBuilder.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000019
Clement Courbet44b4c542018-06-19 11:28:59 +000020namespace exegesis {
21
22namespace {
23
Clement Courbet717c9762018-06-28 07:41:16 +000024// Common code for X86 Uops and Latency runners.
Clement Courbetd939f6d2018-09-13 07:40:53 +000025template <typename Impl> class X86SnippetGenerator : public Impl {
Clement Courbet717c9762018-06-28 07:41:16 +000026 using Impl::Impl;
Clement Courbet4860b982018-06-26 08:49:30 +000027
Guillaume Chatelete60866a2018-08-03 09:29:38 +000028 llvm::Expected<CodeTemplate>
29 generateCodeTemplate(unsigned Opcode) const override {
Clement Courbet717c9762018-06-28 07:41:16 +000030 // Test whether we can generate a snippet for this instruction.
31 const auto &InstrInfo = this->State.getInstrInfo();
32 const auto OpcodeName = InstrInfo.getName(Opcode);
33 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
34 OpcodeName.startswith("ADJCALLSTACK")) {
35 return llvm::make_error<BenchmarkFailure>(
36 "Unsupported opcode: Push/Pop/AdjCallStack");
Clement Courbet4860b982018-06-26 08:49:30 +000037 }
Clement Courbet717c9762018-06-28 07:41:16 +000038
39 // Handle X87.
40 const auto &InstrDesc = InstrInfo.get(Opcode);
41 const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
42 const Instruction Instr(InstrDesc, this->RATC);
43 switch (FPInstClass) {
44 case llvm::X86II::NotFP:
45 break;
46 case llvm::X86II::ZeroArgFP:
Clement Courbetf9a0bb32018-07-05 13:54:51 +000047 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
Clement Courbet717c9762018-06-28 07:41:16 +000048 case llvm::X86II::OneArgFP:
Clement Courbetf9a0bb32018-07-05 13:54:51 +000049 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
Clement Courbet717c9762018-06-28 07:41:16 +000050 case llvm::X86II::OneArgFPRW:
51 case llvm::X86II::TwoArgFP: {
52 // These are instructions like
53 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55 // They are intrinsically serial and do not modify the state of the stack.
56 // We generate the same code for latency and uops.
Guillaume Chatelete60866a2018-08-03 09:29:38 +000057 return this->generateSelfAliasingCodeTemplate(Instr);
Clement Courbet717c9762018-06-28 07:41:16 +000058 }
59 case llvm::X86II::CompareFP:
60 return Impl::handleCompareFP(Instr);
61 case llvm::X86II::CondMovFP:
62 return Impl::handleCondMovFP(Instr);
63 case llvm::X86II::SpecialFP:
Clement Courbetf9a0bb32018-07-05 13:54:51 +000064 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
Clement Courbet717c9762018-06-28 07:41:16 +000065 default:
66 llvm_unreachable("Unknown FP Type!");
67 }
68
69 // Fallback to generic implementation.
Guillaume Chatelete60866a2018-08-03 09:29:38 +000070 return Impl::Base::generateCodeTemplate(Opcode);
Clement Courbet4860b982018-06-26 08:49:30 +000071 }
72};
73
Clement Courbetd939f6d2018-09-13 07:40:53 +000074class X86LatencyImpl : public LatencySnippetGenerator {
Clement Courbet717c9762018-06-28 07:41:16 +000075protected:
Clement Courbetd939f6d2018-09-13 07:40:53 +000076 using Base = LatencySnippetGenerator;
Clement Courbet717c9762018-06-28 07:41:16 +000077 using Base::Base;
Guillaume Chatelete60866a2018-08-03 09:29:38 +000078 llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
Clement Courbetd939f6d2018-09-13 07:40:53 +000079 return llvm::make_error<SnippetGeneratorFailure>(
80 "Unsupported x87 CompareFP");
Clement Courbet717c9762018-06-28 07:41:16 +000081 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +000082 llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
Clement Courbetd939f6d2018-09-13 07:40:53 +000083 return llvm::make_error<SnippetGeneratorFailure>(
84 "Unsupported x87 CondMovFP");
Clement Courbet717c9762018-06-28 07:41:16 +000085 }
Clement Courbet717c9762018-06-28 07:41:16 +000086};
87
Clement Courbetd939f6d2018-09-13 07:40:53 +000088class X86UopsImpl : public UopsSnippetGenerator {
Clement Courbet717c9762018-06-28 07:41:16 +000089protected:
Clement Courbetd939f6d2018-09-13 07:40:53 +000090 using Base = UopsSnippetGenerator;
Clement Courbet717c9762018-06-28 07:41:16 +000091 using Base::Base;
Clement Courbetf9a0bb32018-07-05 13:54:51 +000092 // We can compute uops for any FP instruction that does not grow or shrink the
93 // stack (either do not touch the stack or push as much as they pop).
Guillaume Chatelete60866a2018-08-03 09:29:38 +000094 llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
95 return generateUnconstrainedCodeTemplate(
Clement Courbetf9a0bb32018-07-05 13:54:51 +000096 Instr, "instruction does not grow/shrink the FP stack");
Clement Courbet717c9762018-06-28 07:41:16 +000097 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +000098 llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
99 return generateUnconstrainedCodeTemplate(
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000100 Instr, "instruction does not grow/shrink the FP stack");
Clement Courbet4860b982018-06-26 08:49:30 +0000101 }
102};
103
Clement Courbet44b4c542018-06-19 11:28:59 +0000104class ExegesisX86Target : public ExegesisTarget {
Clement Courbet6fd00e32018-06-20 11:54:35 +0000105 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
106 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
Clement Courbet717c9762018-06-28 07:41:16 +0000107 PM.add(llvm::createX86FloatingPointStackifierPass());
Clement Courbet6fd00e32018-06-20 11:54:35 +0000108 }
109
Guillaume Chateletfb943542018-08-01 14:41:45 +0000110 unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
111 if (!TT.isArch64Bit()) {
112 // FIXME: This would require popping from the stack, so we would have to
113 // add some additional setup code.
114 return 0;
115 }
116 return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
117 }
118
119 unsigned getMaxMemoryAccessSize() const override { return 64; }
120
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000121 void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
Guillaume Chateletfb943542018-08-01 14:41:45 +0000122 unsigned Offset) const override {
123 // FIXME: For instructions that read AND write to memory, we use the same
124 // value for input and output.
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000125 for (size_t I = 0, E = IB.Instr.Operands.size(); I < E; ++I) {
126 const Operand *Op = &IB.Instr.Operands[I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000127 if (Op->IsExplicit && Op->IsMem) {
128 // Case 1: 5-op memory.
129 assert((I + 5 <= E) && "x86 memory references are always 5 ops");
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000130 IB.getValueFor(*Op) = llvm::MCOperand::createReg(Reg); // BaseReg
131 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000132 assert(Op->IsMem);
133 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000134 IB.getValueFor(*Op) = llvm::MCOperand::createImm(1); // ScaleAmt
135 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000136 assert(Op->IsMem);
137 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000138 IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // IndexReg
139 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000140 assert(Op->IsMem);
141 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000142 IB.getValueFor(*Op) = llvm::MCOperand::createImm(Offset); // Disp
143 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000144 assert(Op->IsMem);
145 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000146 IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // Segment
Guillaume Chateletfb943542018-08-01 14:41:45 +0000147 // Case2: segment:index addressing. We assume that ES is 0.
148 }
149 }
150 }
151
Clement Courbete7851692018-07-03 06:17:05 +0000152 std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
153 unsigned Reg) const override {
154 // GPR.
Clement Courbeta5334922018-07-02 06:39:55 +0000155 if (llvm::X86::GR8RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000156 return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000157 if (llvm::X86::GR16RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000158 return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000159 if (llvm::X86::GR32RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000160 return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
Clement Courbeta5334922018-07-02 06:39:55 +0000161 if (llvm::X86::GR64RegClass.contains(Reg))
Clement Courbeta51efc22018-06-25 13:12:02 +0000162 return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
Clement Courbete7851692018-07-03 06:17:05 +0000163 // MMX.
164 if (llvm::X86::VR64RegClass.contains(Reg))
165 return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
166 // {X,Y,Z}MM.
167 if (llvm::X86::VR128XRegClass.contains(Reg)) {
168 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
169 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
170 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
171 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
172 return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
173 }
174 if (llvm::X86::VR256XRegClass.contains(Reg)) {
175 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
176 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
Clement Courbeta51efc22018-06-25 13:12:02 +0000177 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
Clement Courbete7851692018-07-03 06:17:05 +0000178 }
Clement Courbeta5334922018-07-02 06:39:55 +0000179 if (llvm::X86::VR512RegClass.contains(Reg))
Clement Courbete7851692018-07-03 06:17:05 +0000180 return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
181 // X87.
Clement Courbet717c9762018-06-28 07:41:16 +0000182 if (llvm::X86::RFP32RegClass.contains(Reg) ||
183 llvm::X86::RFP64RegClass.contains(Reg) ||
Clement Courbeta5334922018-07-02 06:39:55 +0000184 llvm::X86::RFP80RegClass.contains(Reg))
Clement Courbet717c9762018-06-28 07:41:16 +0000185 return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000186 if (Reg == llvm::X86::EFLAGS) {
187 // Set all flags to 0 but the bits that are "reserved and set to 1".
188 constexpr const uint32_t kImmValue = 0x00007002u;
189 std::vector<llvm::MCInst> Result;
190 Result.push_back(allocateStackSpace(8));
191 Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
192 Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
193 return Result;
194 }
Clement Courbeta51efc22018-06-25 13:12:02 +0000195 return {};
196 }
197
Clement Courbetd939f6d2018-09-13 07:40:53 +0000198 std::unique_ptr<SnippetGenerator>
199 createLatencySnippetGenerator(const LLVMState &State) const override {
200 return llvm::make_unique<X86SnippetGenerator<X86LatencyImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000201 }
202
Clement Courbetd939f6d2018-09-13 07:40:53 +0000203 std::unique_ptr<SnippetGenerator>
204 createUopsSnippetGenerator(const LLVMState &State) const override {
205 return llvm::make_unique<X86SnippetGenerator<X86UopsImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000206 }
207
Clement Courbet44b4c542018-06-19 11:28:59 +0000208 bool matchesArch(llvm::Triple::ArchType Arch) const override {
209 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
210 }
Clement Courbeta51efc22018-06-25 13:12:02 +0000211
212private:
213 // setRegToConstant() specialized for a vector register of size
214 // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
215 // register load.
216 static std::vector<llvm::MCInst>
217 setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
218 const unsigned RMOpcode) {
219 // There is no instruction to directly set XMM, go through memory.
220 // Since vector values can be interpreted as integers of various sizes (8
221 // to 64 bits) as well as floats and double, so we chose an immediate
222 // value that has set bits for all byte values and is a normal float/
223 // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
224 // interpreted as a float.
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000225 constexpr const uint32_t kImmValue = 0x40404040u;
Clement Courbeta51efc22018-06-25 13:12:02 +0000226 std::vector<llvm::MCInst> Result;
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000227 Result.push_back(allocateStackSpace(RegSizeBytes));
228 constexpr const unsigned kMov32NumBytes = 4;
229 for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
230 Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
Clement Courbeta51efc22018-06-25 13:12:02 +0000231 }
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000232 Result.push_back(loadToReg(Reg, RMOpcode));
233 Result.push_back(releaseStackSpace(RegSizeBytes));
Clement Courbeta51efc22018-06-25 13:12:02 +0000234 return Result;
235 }
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000236
237 // Allocates scratch memory on the stack.
238 static llvm::MCInst allocateStackSpace(unsigned Bytes) {
239 return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
240 .addReg(llvm::X86::RSP)
241 .addReg(llvm::X86::RSP)
242 .addImm(Bytes);
243 }
244
245 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
246 static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
247 uint64_t Imm) {
248 return llvm::MCInstBuilder(MovOpcode)
249 // Address = ESP
250 .addReg(llvm::X86::RSP) // BaseReg
251 .addImm(1) // ScaleAmt
252 .addReg(0) // IndexReg
253 .addImm(OffsetBytes) // Disp
254 .addReg(0) // Segment
255 // Immediate.
256 .addImm(Imm);
257 }
258
259 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
260 static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
261 return llvm::MCInstBuilder(RMOpcode)
262 .addReg(Reg)
263 // Address = ESP
264 .addReg(llvm::X86::RSP) // BaseReg
265 .addImm(1) // ScaleAmt
266 .addReg(0) // IndexReg
267 .addImm(0) // Disp
268 .addReg(0); // Segment
269 }
270
271 // Releases scratch memory.
272 static llvm::MCInst releaseStackSpace(unsigned Bytes) {
273 return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
274 .addReg(llvm::X86::RSP)
275 .addReg(llvm::X86::RSP)
276 .addImm(Bytes);
277 }
Clement Courbet44b4c542018-06-19 11:28:59 +0000278};
279
280} // namespace
281
Clement Courbetcff2caa2018-06-25 11:22:23 +0000282static ExegesisTarget *getTheExegesisX86Target() {
Clement Courbet44b4c542018-06-19 11:28:59 +0000283 static ExegesisX86Target Target;
284 return &Target;
285}
286
287void InitializeX86ExegesisTarget() {
288 ExegesisTarget::registerTarget(getTheExegesisX86Target());
289}
290
Clement Courbetcff2caa2018-06-25 11:22:23 +0000291} // namespace exegesis