blob: ae5c2e8d25e8b3c68dedb18061978fe9968b186d [file] [log] [blame]
Clement Courbet44b4c542018-06-19 11:28:59 +00001//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "../Target.h"
10
Clement Courbet4860b982018-06-26 08:49:30 +000011#include "../Latency.h"
12#include "../Uops.h"
Clement Courbet717c9762018-06-28 07:41:16 +000013#include "MCTargetDesc/X86BaseInfo.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000014#include "MCTargetDesc/X86MCTargetDesc.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000015#include "X86.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000016#include "X86RegisterInfo.h"
Clement Courbete7851692018-07-03 06:17:05 +000017#include "X86Subtarget.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000018#include "llvm/MC/MCInstBuilder.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000019
Clement Courbet44b4c542018-06-19 11:28:59 +000020namespace exegesis {
21
22namespace {
23
Guillaume Chatelet946fb052018-10-12 15:12:22 +000024static llvm::Error IsInvalidOpcode(const Instruction &Instr) {
25 const auto OpcodeName = Instr.Name;
26 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
27 OpcodeName.startswith("ADJCALLSTACK"))
28 return llvm::make_error<BenchmarkFailure>(
29 "Unsupported opcode: Push/Pop/AdjCallStack");
30 return llvm::Error::success();
31}
32
33static unsigned GetX86FPFlags(const Instruction &Instr) {
34 return Instr.Description->TSFlags & llvm::X86II::FPTypeMask;
35}
36
37class X86LatencySnippetGenerator : public LatencySnippetGenerator {
38public:
39 using LatencySnippetGenerator::LatencySnippetGenerator;
Clement Courbet4860b982018-06-26 08:49:30 +000040
Guillaume Chatelet296a8622018-10-15 09:09:19 +000041 llvm::Expected<std::vector<CodeTemplate>>
42 generateCodeTemplates(const Instruction &Instr) const override {
Guillaume Chatelet946fb052018-10-12 15:12:22 +000043 if (auto E = IsInvalidOpcode(Instr))
44 return std::move(E);
Clement Courbet717c9762018-06-28 07:41:16 +000045
Guillaume Chatelet946fb052018-10-12 15:12:22 +000046 switch (GetX86FPFlags(Instr)) {
Clement Courbet717c9762018-06-28 07:41:16 +000047 case llvm::X86II::NotFP:
Guillaume Chatelet296a8622018-10-15 09:09:19 +000048 return LatencySnippetGenerator::generateCodeTemplates(Instr);
Clement Courbet717c9762018-06-28 07:41:16 +000049 case llvm::X86II::ZeroArgFP:
Clement Courbet717c9762018-06-28 07:41:16 +000050 case llvm::X86II::OneArgFP:
Guillaume Chatelet946fb052018-10-12 15:12:22 +000051 case llvm::X86II::SpecialFP:
52 case llvm::X86II::CompareFP:
53 case llvm::X86II::CondMovFP:
54 return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
Clement Courbet717c9762018-06-28 07:41:16 +000055 case llvm::X86II::OneArgFPRW:
Guillaume Chatelet946fb052018-10-12 15:12:22 +000056 case llvm::X86II::TwoArgFP:
57 // These are instructions like
58 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
59 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
60 // They are intrinsically serial and do not modify the state of the stack.
Guillaume Chatelet296a8622018-10-15 09:09:19 +000061 return generateSelfAliasingCodeTemplates(Instr);
Guillaume Chatelet946fb052018-10-12 15:12:22 +000062 default:
63 llvm_unreachable("Unknown FP Type!");
64 }
65 }
66};
67
68class X86UopsSnippetGenerator : public UopsSnippetGenerator {
69public:
70 using UopsSnippetGenerator::UopsSnippetGenerator;
71
Guillaume Chatelet296a8622018-10-15 09:09:19 +000072 llvm::Expected<std::vector<CodeTemplate>>
73 generateCodeTemplates(const Instruction &Instr) const override {
Guillaume Chatelet946fb052018-10-12 15:12:22 +000074 if (auto E = IsInvalidOpcode(Instr))
75 return std::move(E);
76
77 switch (GetX86FPFlags(Instr)) {
78 case llvm::X86II::NotFP:
Guillaume Chatelet296a8622018-10-15 09:09:19 +000079 return UopsSnippetGenerator::generateCodeTemplates(Instr);
Guillaume Chatelet946fb052018-10-12 15:12:22 +000080 case llvm::X86II::ZeroArgFP:
81 case llvm::X86II::OneArgFP:
82 case llvm::X86II::SpecialFP:
83 return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
84 case llvm::X86II::OneArgFPRW:
85 case llvm::X86II::TwoArgFP:
Clement Courbet717c9762018-06-28 07:41:16 +000086 // These are instructions like
87 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
88 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
89 // They are intrinsically serial and do not modify the state of the stack.
90 // We generate the same code for latency and uops.
Guillaume Chatelet296a8622018-10-15 09:09:19 +000091 return generateSelfAliasingCodeTemplates(Instr);
Clement Courbet717c9762018-06-28 07:41:16 +000092 case llvm::X86II::CompareFP:
Clement Courbet717c9762018-06-28 07:41:16 +000093 case llvm::X86II::CondMovFP:
Guillaume Chatelet946fb052018-10-12 15:12:22 +000094 // We can compute uops for any FP instruction that does not grow or shrink
95 // the stack (either do not touch the stack or push as much as they pop).
Guillaume Chatelet296a8622018-10-15 09:09:19 +000096 return generateUnconstrainedCodeTemplates(
Guillaume Chatelet946fb052018-10-12 15:12:22 +000097 Instr, "instruction does not grow/shrink the FP stack");
Clement Courbet717c9762018-06-28 07:41:16 +000098 default:
99 llvm_unreachable("Unknown FP Type!");
100 }
Clement Courbet4860b982018-06-26 08:49:30 +0000101 }
102};
103
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000104static unsigned GetLoadImmediateOpcode(unsigned RegBitWidth) {
105 switch (RegBitWidth) {
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000106 case 8:
107 return llvm::X86::MOV8ri;
108 case 16:
109 return llvm::X86::MOV16ri;
110 case 32:
111 return llvm::X86::MOV32ri;
112 case 64:
113 return llvm::X86::MOV64ri;
114 }
115 llvm_unreachable("Invalid Value Width");
116}
117
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000118// Generates instruction to load an immediate value into a register.
119static llvm::MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
120 const llvm::APInt &Value) {
121 if (Value.getBitWidth() > RegBitWidth)
122 llvm_unreachable("Value must fit in the Register");
123 return llvm::MCInstBuilder(GetLoadImmediateOpcode(RegBitWidth))
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000124 .addReg(Reg)
125 .addImm(Value.getZExtValue());
126}
127
128// Allocates scratch memory on the stack.
129static llvm::MCInst allocateStackSpace(unsigned Bytes) {
130 return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
131 .addReg(llvm::X86::RSP)
132 .addReg(llvm::X86::RSP)
133 .addImm(Bytes);
134}
135
136// Fills scratch memory at offset `OffsetBytes` with value `Imm`.
137static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
138 uint64_t Imm) {
139 return llvm::MCInstBuilder(MovOpcode)
140 // Address = ESP
141 .addReg(llvm::X86::RSP) // BaseReg
142 .addImm(1) // ScaleAmt
143 .addReg(0) // IndexReg
144 .addImm(OffsetBytes) // Disp
145 .addReg(0) // Segment
146 // Immediate.
147 .addImm(Imm);
148}
149
150// Loads scratch memory into register `Reg` using opcode `RMOpcode`.
151static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
152 return llvm::MCInstBuilder(RMOpcode)
153 .addReg(Reg)
154 // Address = ESP
155 .addReg(llvm::X86::RSP) // BaseReg
156 .addImm(1) // ScaleAmt
157 .addReg(0) // IndexReg
158 .addImm(0) // Disp
159 .addReg(0); // Segment
160}
161
162// Releases scratch memory.
163static llvm::MCInst releaseStackSpace(unsigned Bytes) {
164 return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
165 .addReg(llvm::X86::RSP)
166 .addReg(llvm::X86::RSP)
167 .addImm(Bytes);
168}
169
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000170// Reserves some space on the stack, fills it with the content of the provided
171// constant and provide methods to load the stack value into a register.
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000172struct ConstantInliner {
Clement Courbet78b2e732018-09-25 07:31:44 +0000173 explicit ConstantInliner(const llvm::APInt &Constant) : Constant_(Constant) {}
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000174
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000175 std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
176 unsigned Opcode) {
Clement Courbet78b2e732018-09-25 07:31:44 +0000177 assert((RegBitWidth & 7) == 0 &&
178 "RegBitWidth must be a multiple of 8 bits");
179 initStack(RegBitWidth / 8);
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000180 add(loadToReg(Reg, Opcode));
Clement Courbet78b2e732018-09-25 07:31:44 +0000181 add(releaseStackSpace(RegBitWidth / 8));
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000182 return std::move(Instructions);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000183 }
184
Clement Courbetc51f4522018-10-19 09:56:54 +0000185 std::vector<llvm::MCInst> loadX87STAndFinalize(unsigned Reg) {
186 initStack(kF80Bytes);
187 add(llvm::MCInstBuilder(llvm::X86::LD_F80m)
188 // Address = ESP
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000189 .addReg(llvm::X86::RSP) // BaseReg
190 .addImm(1) // ScaleAmt
191 .addReg(0) // IndexReg
192 .addImm(0) // Disp
193 .addReg(0)); // Segment
194 if (Reg != llvm::X86::ST0)
195 add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
Clement Courbetc51f4522018-10-19 09:56:54 +0000196 add(releaseStackSpace(kF80Bytes));
197 return std::move(Instructions);
198 }
199
200 std::vector<llvm::MCInst> loadX87FPAndFinalize(unsigned Reg) {
201 initStack(kF80Bytes);
202 add(llvm::MCInstBuilder(llvm::X86::LD_Fp80m)
203 .addReg(Reg)
204 // Address = ESP
205 .addReg(llvm::X86::RSP) // BaseReg
206 .addImm(1) // ScaleAmt
207 .addReg(0) // IndexReg
208 .addImm(0) // Disp
209 .addReg(0)); // Segment
210 add(releaseStackSpace(kF80Bytes));
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000211 return std::move(Instructions);
212 }
213
214 std::vector<llvm::MCInst> popFlagAndFinalize() {
Clement Courbet78b2e732018-09-25 07:31:44 +0000215 initStack(8);
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000216 add(llvm::MCInstBuilder(llvm::X86::POPF64));
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000217 return std::move(Instructions);
218 }
219
220private:
Clement Courbetc51f4522018-10-19 09:56:54 +0000221 static constexpr const unsigned kF80Bytes = 10; // 80 bits.
222
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000223 ConstantInliner &add(const llvm::MCInst &Inst) {
224 Instructions.push_back(Inst);
225 return *this;
226 }
227
Clement Courbet78b2e732018-09-25 07:31:44 +0000228 void initStack(unsigned Bytes) {
229 assert(Constant_.getBitWidth() <= Bytes * 8 &&
230 "Value does not have the correct size");
231 const llvm::APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
232 ? Constant_.sext(Bytes * 8)
233 : Constant_;
234 add(allocateStackSpace(Bytes));
235 size_t ByteOffset = 0;
236 for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
237 add(fillStackSpace(
238 llvm::X86::MOV32mi, ByteOffset,
239 WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
240 if (Bytes - ByteOffset >= 2) {
241 add(fillStackSpace(
242 llvm::X86::MOV16mi, ByteOffset,
243 WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
244 ByteOffset += 2;
245 }
246 if (Bytes - ByteOffset >= 1)
247 add(fillStackSpace(
248 llvm::X86::MOV8mi, ByteOffset,
249 WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
250 }
251
252 llvm::APInt Constant_;
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000253 std::vector<llvm::MCInst> Instructions;
254};
255
Clement Courbet44b4c542018-06-19 11:28:59 +0000256class ExegesisX86Target : public ExegesisTarget {
Clement Courbet6fd00e32018-06-20 11:54:35 +0000257 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
258 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
Clement Courbet717c9762018-06-28 07:41:16 +0000259 PM.add(llvm::createX86FloatingPointStackifierPass());
Clement Courbet6fd00e32018-06-20 11:54:35 +0000260 }
261
Guillaume Chateletfb943542018-08-01 14:41:45 +0000262 unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
263 if (!TT.isArch64Bit()) {
264 // FIXME: This would require popping from the stack, so we would have to
265 // add some additional setup code.
266 return 0;
267 }
268 return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
269 }
270
271 unsigned getMaxMemoryAccessSize() const override { return 64; }
272
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000273 void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
Guillaume Chateletfb943542018-08-01 14:41:45 +0000274 unsigned Offset) const override {
275 // FIXME: For instructions that read AND write to memory, we use the same
276 // value for input and output.
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000277 for (size_t I = 0, E = IT.Instr.Operands.size(); I < E; ++I) {
278 const Operand *Op = &IT.Instr.Operands[I];
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000279 if (Op->isExplicit() && Op->isMemory()) {
Guillaume Chateletfb943542018-08-01 14:41:45 +0000280 // Case 1: 5-op memory.
281 assert((I + 5 <= E) && "x86 memory references are always 5 ops");
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000282 IT.getValueFor(*Op) = llvm::MCOperand::createReg(Reg); // BaseReg
283 Op = &IT.Instr.Operands[++I];
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000284 assert(Op->isMemory());
285 assert(Op->isExplicit());
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000286 IT.getValueFor(*Op) = llvm::MCOperand::createImm(1); // ScaleAmt
287 Op = &IT.Instr.Operands[++I];
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000288 assert(Op->isMemory());
289 assert(Op->isExplicit());
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000290 IT.getValueFor(*Op) = llvm::MCOperand::createReg(0); // IndexReg
291 Op = &IT.Instr.Operands[++I];
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000292 assert(Op->isMemory());
293 assert(Op->isExplicit());
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000294 IT.getValueFor(*Op) = llvm::MCOperand::createImm(Offset); // Disp
295 Op = &IT.Instr.Operands[++I];
Guillaume Chatelet09c28392018-10-09 08:59:10 +0000296 assert(Op->isMemory());
297 assert(Op->isExplicit());
Guillaume Chatelet70ac0192018-09-27 09:23:04 +0000298 IT.getValueFor(*Op) = llvm::MCOperand::createReg(0); // Segment
Guillaume Chateletfb943542018-08-01 14:41:45 +0000299 // Case2: segment:index addressing. We assume that ES is 0.
300 }
301 }
302 }
303
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000304 std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
305 unsigned Reg,
306 const llvm::APInt &Value) const override {
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000307 if (llvm::X86::GR8RegClass.contains(Reg))
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000308 return {loadImmediate(Reg, 8, Value)};
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000309 if (llvm::X86::GR16RegClass.contains(Reg))
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000310 return {loadImmediate(Reg, 16, Value)};
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000311 if (llvm::X86::GR32RegClass.contains(Reg))
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000312 return {loadImmediate(Reg, 32, Value)};
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000313 if (llvm::X86::GR64RegClass.contains(Reg))
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000314 return {loadImmediate(Reg, 64, Value)};
315 ConstantInliner CI(Value);
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000316 if (llvm::X86::VR64RegClass.contains(Reg))
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000317 return CI.loadAndFinalize(Reg, 64, llvm::X86::MMX_MOVQ64rm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000318 if (llvm::X86::VR128XRegClass.contains(Reg)) {
319 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000320 return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQU32Z128rm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000321 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000322 return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQUrm);
323 return CI.loadAndFinalize(Reg, 128, llvm::X86::MOVDQUrm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000324 }
325 if (llvm::X86::VR256XRegClass.contains(Reg)) {
326 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000327 return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQU32Z256rm);
328 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
329 return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQUYrm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000330 }
331 if (llvm::X86::VR512RegClass.contains(Reg))
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000332 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
333 return CI.loadAndFinalize(Reg, 512, llvm::X86::VMOVDQU32Zrm);
334 if (llvm::X86::RSTRegClass.contains(Reg)) {
Clement Courbetc51f4522018-10-19 09:56:54 +0000335 return CI.loadX87STAndFinalize(Reg);
336 }
337 if (llvm::X86::RFP32RegClass.contains(Reg) ||
338 llvm::X86::RFP64RegClass.contains(Reg) ||
339 llvm::X86::RFP80RegClass.contains(Reg)) {
340 return CI.loadX87FPAndFinalize(Reg);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000341 }
Guillaume Chateletc96a97b2018-09-20 12:22:18 +0000342 if (Reg == llvm::X86::EFLAGS)
343 return CI.popFlagAndFinalize();
344 return {}; // Not yet implemented.
Clement Courbeta51efc22018-06-25 13:12:02 +0000345 }
346
Clement Courbetd939f6d2018-09-13 07:40:53 +0000347 std::unique_ptr<SnippetGenerator>
348 createLatencySnippetGenerator(const LLVMState &State) const override {
Guillaume Chatelet946fb052018-10-12 15:12:22 +0000349 return llvm::make_unique<X86LatencySnippetGenerator>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000350 }
351
Clement Courbetd939f6d2018-09-13 07:40:53 +0000352 std::unique_ptr<SnippetGenerator>
353 createUopsSnippetGenerator(const LLVMState &State) const override {
Guillaume Chatelet946fb052018-10-12 15:12:22 +0000354 return llvm::make_unique<X86UopsSnippetGenerator>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000355 }
356
Clement Courbet44b4c542018-06-19 11:28:59 +0000357 bool matchesArch(llvm::Triple::ArchType Arch) const override {
358 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
359 }
360};
361
362} // namespace
363
Clement Courbetcff2caa2018-06-25 11:22:23 +0000364static ExegesisTarget *getTheExegesisX86Target() {
Clement Courbet44b4c542018-06-19 11:28:59 +0000365 static ExegesisX86Target Target;
366 return &Target;
367}
368
369void InitializeX86ExegesisTarget() {
370 ExegesisTarget::registerTarget(getTheExegesisX86Target());
371}
372
Clement Courbetcff2caa2018-06-25 11:22:23 +0000373} // namespace exegesis