blob: e55e77d876ccb20107a2e23b07a3ece99876850b [file] [log] [blame]
Clement Courbet44b4c542018-06-19 11:28:59 +00001//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "../Target.h"
10
Clement Courbet4860b982018-06-26 08:49:30 +000011#include "../Latency.h"
12#include "../Uops.h"
Clement Courbet717c9762018-06-28 07:41:16 +000013#include "MCTargetDesc/X86BaseInfo.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000014#include "MCTargetDesc/X86MCTargetDesc.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000015#include "X86.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000016#include "X86RegisterInfo.h"
Clement Courbete7851692018-07-03 06:17:05 +000017#include "X86Subtarget.h"
Clement Courbeta51efc22018-06-25 13:12:02 +000018#include "llvm/MC/MCInstBuilder.h"
Clement Courbet6fd00e32018-06-20 11:54:35 +000019
Clement Courbet44b4c542018-06-19 11:28:59 +000020namespace exegesis {
21
22namespace {
23
Clement Courbet717c9762018-06-28 07:41:16 +000024// Common code for X86 Uops and Latency runners.
Clement Courbetd939f6d2018-09-13 07:40:53 +000025template <typename Impl> class X86SnippetGenerator : public Impl {
Clement Courbet717c9762018-06-28 07:41:16 +000026 using Impl::Impl;
Clement Courbet4860b982018-06-26 08:49:30 +000027
Guillaume Chatelete60866a2018-08-03 09:29:38 +000028 llvm::Expected<CodeTemplate>
29 generateCodeTemplate(unsigned Opcode) const override {
Clement Courbet717c9762018-06-28 07:41:16 +000030 // Test whether we can generate a snippet for this instruction.
31 const auto &InstrInfo = this->State.getInstrInfo();
32 const auto OpcodeName = InstrInfo.getName(Opcode);
33 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
34 OpcodeName.startswith("ADJCALLSTACK")) {
35 return llvm::make_error<BenchmarkFailure>(
36 "Unsupported opcode: Push/Pop/AdjCallStack");
Clement Courbet4860b982018-06-26 08:49:30 +000037 }
Clement Courbet717c9762018-06-28 07:41:16 +000038
39 // Handle X87.
40 const auto &InstrDesc = InstrInfo.get(Opcode);
41 const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
42 const Instruction Instr(InstrDesc, this->RATC);
43 switch (FPInstClass) {
44 case llvm::X86II::NotFP:
45 break;
46 case llvm::X86II::ZeroArgFP:
Clement Courbetf9a0bb32018-07-05 13:54:51 +000047 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
Clement Courbet717c9762018-06-28 07:41:16 +000048 case llvm::X86II::OneArgFP:
Clement Courbetf9a0bb32018-07-05 13:54:51 +000049 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
Clement Courbet717c9762018-06-28 07:41:16 +000050 case llvm::X86II::OneArgFPRW:
51 case llvm::X86II::TwoArgFP: {
52 // These are instructions like
53 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55 // They are intrinsically serial and do not modify the state of the stack.
56 // We generate the same code for latency and uops.
Guillaume Chatelete60866a2018-08-03 09:29:38 +000057 return this->generateSelfAliasingCodeTemplate(Instr);
Clement Courbet717c9762018-06-28 07:41:16 +000058 }
59 case llvm::X86II::CompareFP:
60 return Impl::handleCompareFP(Instr);
61 case llvm::X86II::CondMovFP:
62 return Impl::handleCondMovFP(Instr);
63 case llvm::X86II::SpecialFP:
Clement Courbetf9a0bb32018-07-05 13:54:51 +000064 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
Clement Courbet717c9762018-06-28 07:41:16 +000065 default:
66 llvm_unreachable("Unknown FP Type!");
67 }
68
69 // Fallback to generic implementation.
Guillaume Chatelete60866a2018-08-03 09:29:38 +000070 return Impl::Base::generateCodeTemplate(Opcode);
Clement Courbet4860b982018-06-26 08:49:30 +000071 }
72};
73
Clement Courbetd939f6d2018-09-13 07:40:53 +000074class X86LatencyImpl : public LatencySnippetGenerator {
Clement Courbet717c9762018-06-28 07:41:16 +000075protected:
Clement Courbetd939f6d2018-09-13 07:40:53 +000076 using Base = LatencySnippetGenerator;
Clement Courbet717c9762018-06-28 07:41:16 +000077 using Base::Base;
Guillaume Chatelete60866a2018-08-03 09:29:38 +000078 llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
Clement Courbetd939f6d2018-09-13 07:40:53 +000079 return llvm::make_error<SnippetGeneratorFailure>(
80 "Unsupported x87 CompareFP");
Clement Courbet717c9762018-06-28 07:41:16 +000081 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +000082 llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
Clement Courbetd939f6d2018-09-13 07:40:53 +000083 return llvm::make_error<SnippetGeneratorFailure>(
84 "Unsupported x87 CondMovFP");
Clement Courbet717c9762018-06-28 07:41:16 +000085 }
Clement Courbet717c9762018-06-28 07:41:16 +000086};
87
Clement Courbetd939f6d2018-09-13 07:40:53 +000088class X86UopsImpl : public UopsSnippetGenerator {
Clement Courbet717c9762018-06-28 07:41:16 +000089protected:
Clement Courbetd939f6d2018-09-13 07:40:53 +000090 using Base = UopsSnippetGenerator;
Clement Courbet717c9762018-06-28 07:41:16 +000091 using Base::Base;
Clement Courbetf9a0bb32018-07-05 13:54:51 +000092 // We can compute uops for any FP instruction that does not grow or shrink the
93 // stack (either do not touch the stack or push as much as they pop).
Guillaume Chatelete60866a2018-08-03 09:29:38 +000094 llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
95 return generateUnconstrainedCodeTemplate(
Clement Courbetf9a0bb32018-07-05 13:54:51 +000096 Instr, "instruction does not grow/shrink the FP stack");
Clement Courbet717c9762018-06-28 07:41:16 +000097 }
Guillaume Chatelete60866a2018-08-03 09:29:38 +000098 llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
99 return generateUnconstrainedCodeTemplate(
Clement Courbetf9a0bb32018-07-05 13:54:51 +0000100 Instr, "instruction does not grow/shrink the FP stack");
Clement Courbet4860b982018-06-26 08:49:30 +0000101 }
102};
103
Simon Pilgrim02426892018-09-18 15:35:49 +0000104static unsigned GetLoadImmediateOpcode(const llvm::APInt &Value) {
105 switch (Value.getBitWidth()) {
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000106 case 8:
107 return llvm::X86::MOV8ri;
108 case 16:
109 return llvm::X86::MOV16ri;
110 case 32:
111 return llvm::X86::MOV32ri;
112 case 64:
113 return llvm::X86::MOV64ri;
114 }
115 llvm_unreachable("Invalid Value Width");
116}
117
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000118static llvm::MCInst loadImmediate(unsigned Reg, const llvm::APInt &Value) {
Simon Pilgrim02426892018-09-18 15:35:49 +0000119 return llvm::MCInstBuilder(GetLoadImmediateOpcode(Value))
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000120 .addReg(Reg)
121 .addImm(Value.getZExtValue());
122}
123
124// Allocates scratch memory on the stack.
125static llvm::MCInst allocateStackSpace(unsigned Bytes) {
126 return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
127 .addReg(llvm::X86::RSP)
128 .addReg(llvm::X86::RSP)
129 .addImm(Bytes);
130}
131
132// Fills scratch memory at offset `OffsetBytes` with value `Imm`.
133static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
134 uint64_t Imm) {
135 return llvm::MCInstBuilder(MovOpcode)
136 // Address = ESP
137 .addReg(llvm::X86::RSP) // BaseReg
138 .addImm(1) // ScaleAmt
139 .addReg(0) // IndexReg
140 .addImm(OffsetBytes) // Disp
141 .addReg(0) // Segment
142 // Immediate.
143 .addImm(Imm);
144}
145
146// Loads scratch memory into register `Reg` using opcode `RMOpcode`.
147static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
148 return llvm::MCInstBuilder(RMOpcode)
149 .addReg(Reg)
150 // Address = ESP
151 .addReg(llvm::X86::RSP) // BaseReg
152 .addImm(1) // ScaleAmt
153 .addReg(0) // IndexReg
154 .addImm(0) // Disp
155 .addReg(0); // Segment
156}
157
158// Releases scratch memory.
159static llvm::MCInst releaseStackSpace(unsigned Bytes) {
160 return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
161 .addReg(llvm::X86::RSP)
162 .addReg(llvm::X86::RSP)
163 .addImm(Bytes);
164}
165
166struct ConstantInliner {
167 explicit ConstantInliner(const llvm::APInt &Constant)
168 : StackSize(Constant.getBitWidth() / 8) {
169 assert(Constant.getBitWidth() % 8 == 0 && "Must be a multiple of 8");
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000170 Add(allocateStackSpace(StackSize));
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000171 size_t ByteOffset = 0;
172 for (; StackSize - ByteOffset >= 4; ByteOffset += 4)
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000173 Add(fillStackSpace(
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000174 llvm::X86::MOV32mi, ByteOffset,
175 Constant.extractBits(32, ByteOffset * 8).getZExtValue()));
176 if (StackSize - ByteOffset >= 2) {
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000177 Add(fillStackSpace(
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000178 llvm::X86::MOV16mi, ByteOffset,
179 Constant.extractBits(16, ByteOffset * 8).getZExtValue()));
180 ByteOffset += 2;
181 }
182 if (StackSize - ByteOffset >= 1)
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000183 Add(fillStackSpace(
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000184 llvm::X86::MOV8mi, ByteOffset,
185 Constant.extractBits(8, ByteOffset * 8).getZExtValue()));
186 }
187
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000188 ConstantInliner &Add(const llvm::MCInst &Inst) {
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000189 Instructions.push_back(Inst);
190 return *this;
191 }
192
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000193 std::vector<llvm::MCInst> finalize() {
194 Add(releaseStackSpace(StackSize));
195 return std::move(Instructions);
196 }
197
198private:
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000199 const size_t StackSize;
200 std::vector<llvm::MCInst> Instructions;
201};
202
Clement Courbet44b4c542018-06-19 11:28:59 +0000203class ExegesisX86Target : public ExegesisTarget {
Clement Courbet6fd00e32018-06-20 11:54:35 +0000204 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
205 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
Clement Courbet717c9762018-06-28 07:41:16 +0000206 PM.add(llvm::createX86FloatingPointStackifierPass());
Clement Courbet6fd00e32018-06-20 11:54:35 +0000207 }
208
Guillaume Chateletfb943542018-08-01 14:41:45 +0000209 unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
210 if (!TT.isArch64Bit()) {
211 // FIXME: This would require popping from the stack, so we would have to
212 // add some additional setup code.
213 return 0;
214 }
215 return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
216 }
217
218 unsigned getMaxMemoryAccessSize() const override { return 64; }
219
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000220 void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
Guillaume Chateletfb943542018-08-01 14:41:45 +0000221 unsigned Offset) const override {
222 // FIXME: For instructions that read AND write to memory, we use the same
223 // value for input and output.
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000224 for (size_t I = 0, E = IB.Instr.Operands.size(); I < E; ++I) {
225 const Operand *Op = &IB.Instr.Operands[I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000226 if (Op->IsExplicit && Op->IsMem) {
227 // Case 1: 5-op memory.
228 assert((I + 5 <= E) && "x86 memory references are always 5 ops");
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000229 IB.getValueFor(*Op) = llvm::MCOperand::createReg(Reg); // BaseReg
230 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000231 assert(Op->IsMem);
232 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000233 IB.getValueFor(*Op) = llvm::MCOperand::createImm(1); // ScaleAmt
234 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000235 assert(Op->IsMem);
236 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000237 IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // IndexReg
238 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000239 assert(Op->IsMem);
240 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000241 IB.getValueFor(*Op) = llvm::MCOperand::createImm(Offset); // Disp
242 Op = &IB.Instr.Operands[++I];
Guillaume Chateletfb943542018-08-01 14:41:45 +0000243 assert(Op->IsMem);
244 assert(Op->IsExplicit);
Guillaume Chatelet171f3f42018-08-02 11:12:02 +0000245 IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // Segment
Guillaume Chateletfb943542018-08-01 14:41:45 +0000246 // Case2: segment:index addressing. We assume that ES is 0.
247 }
248 }
249 }
250
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000251 std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
252 unsigned Reg) const override {
253 // GPR.
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000254 if (llvm::X86::GR8RegClass.contains(Reg))
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000255 return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000256 if (llvm::X86::GR16RegClass.contains(Reg))
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000257 return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000258 if (llvm::X86::GR32RegClass.contains(Reg))
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000259 return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000260 if (llvm::X86::GR64RegClass.contains(Reg))
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000261 return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
262 // MMX.
Guillaume Chatelet5ad29092018-09-18 11:26:27 +0000263 if (llvm::X86::VR64RegClass.contains(Reg))
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000264 return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
265 // {X,Y,Z}MM.
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000266 if (llvm::X86::VR128XRegClass.contains(Reg)) {
267 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000268 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000269 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000270 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
271 return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000272 }
273 if (llvm::X86::VR256XRegClass.contains(Reg)) {
274 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000275 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
276 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000277 }
278 if (llvm::X86::VR512RegClass.contains(Reg))
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000279 return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
280 // X87.
281 if (llvm::X86::RFP32RegClass.contains(Reg) ||
282 llvm::X86::RFP64RegClass.contains(Reg) ||
283 llvm::X86::RFP80RegClass.contains(Reg))
284 return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
285 if (Reg == llvm::X86::EFLAGS) {
286 // Set all flags to 0 but the bits that are "reserved and set to 1".
287 constexpr const uint32_t kImmValue = 0x00007002u;
288 std::vector<llvm::MCInst> Result;
289 Result.push_back(allocateStackSpace(8));
290 Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
291 Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
292 return Result;
Guillaume Chatelet8721ad92018-09-18 11:26:35 +0000293 }
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000294 llvm_unreachable("Not yet implemented");
295 }
296
297 std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
298 const llvm::APInt &Value,
299 unsigned Reg) const override {
300 if (llvm::X86::GR8RegClass.contains(Reg) ||
301 llvm::X86::GR16RegClass.contains(Reg) ||
302 llvm::X86::GR32RegClass.contains(Reg) ||
303 llvm::X86::GR64RegClass.contains(Reg))
304 return {loadImmediate(Reg, Value)};
305 ConstantInliner CI(Value);
306 if (llvm::X86::VR64RegClass.contains(Reg))
307 return CI.Add(loadToReg(Reg, llvm::X86::MMX_MOVQ64rm)).finalize();
Simon Pilgrim02426892018-09-18 15:35:49 +0000308 llvm_unreachable("Not yet implemented");
Clement Courbeta51efc22018-06-25 13:12:02 +0000309 }
310
Clement Courbetd939f6d2018-09-13 07:40:53 +0000311 std::unique_ptr<SnippetGenerator>
312 createLatencySnippetGenerator(const LLVMState &State) const override {
313 return llvm::make_unique<X86SnippetGenerator<X86LatencyImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000314 }
315
Clement Courbetd939f6d2018-09-13 07:40:53 +0000316 std::unique_ptr<SnippetGenerator>
317 createUopsSnippetGenerator(const LLVMState &State) const override {
318 return llvm::make_unique<X86SnippetGenerator<X86UopsImpl>>(State);
Clement Courbet4860b982018-06-26 08:49:30 +0000319 }
320
Clement Courbet44b4c542018-06-19 11:28:59 +0000321 bool matchesArch(llvm::Triple::ArchType Arch) const override {
322 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
323 }
Simon Pilgrimf652ef32018-09-18 15:38:16 +0000324
325private:
326 // setRegToConstant() specialized for a vector register of size
327 // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
328 // register load.
329 static std::vector<llvm::MCInst>
330 setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
331 const unsigned RMOpcode) {
332 // There is no instruction to directly set XMM, go through memory.
333 // Since vector values can be interpreted as integers of various sizes (8
334 // to 64 bits) as well as floats and double, so we chose an immediate
335 // value that has set bits for all byte values and is a normal float/
336 // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
337 // interpreted as a float.
338 constexpr const uint32_t kImmValue = 0x40404040u;
339 std::vector<llvm::MCInst> Result;
340 Result.push_back(allocateStackSpace(RegSizeBytes));
341 constexpr const unsigned kMov32NumBytes = 4;
342 for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
343 Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
344 }
345 Result.push_back(loadToReg(Reg, RMOpcode));
346 Result.push_back(releaseStackSpace(RegSizeBytes));
347 return Result;
348 }
Clement Courbet44b4c542018-06-19 11:28:59 +0000349};
350
351} // namespace
352
Clement Courbetcff2caa2018-06-25 11:22:23 +0000353static ExegesisTarget *getTheExegesisX86Target() {
Clement Courbet44b4c542018-06-19 11:28:59 +0000354 static ExegesisX86Target Target;
355 return &Target;
356}
357
358void InitializeX86ExegesisTarget() {
359 ExegesisTarget::registerTarget(getTheExegesisX86Target());
360}
361
Clement Courbetcff2caa2018-06-25 11:22:23 +0000362} // namespace exegesis