blob: 42d91c09e3bc56fb3b61b8c676b23d40bffaf2f1 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file implements the targeting of the InstructionSelector class for
11/// AMDGPU.
12/// \todo This should be generated by TableGen.
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUInstructionSelector.h"
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterBankInfo.h"
18#include "AMDGPURegisterInfo.h"
19#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000020#include "AMDGPUTargetMachine.h"
21#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
22#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000023#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000024#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstr.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/IR/Type.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32
33#define DEBUG_TYPE "amdgpu-isel"
34
35using namespace llvm;
36
Tom Stellard1dc90202018-05-10 20:53:06 +000037#define GET_GLOBALISEL_IMPL
38#include "AMDGPUGenGlobalISel.inc"
39#undef GET_GLOBALISEL_IMPL
40
Tom Stellardca166212017-01-30 21:56:46 +000041AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard1dc90202018-05-10 20:53:06 +000042 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI,
43 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000044 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000045 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
46 STI(STI),
47 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
48#define GET_GLOBALISEL_PREDICATES_INIT
49#include "AMDGPUGenGlobalISel.inc"
50#undef GET_GLOBALISEL_PREDICATES_INIT
51#define GET_GLOBALISEL_TEMPORARIES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_TEMPORARIES_INIT
54 ,AMDGPUASI(STI.getAMDGPUAS())
55{
56}
57
58const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000059
60MachineOperand
61AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
62 unsigned SubIdx) const {
63
64 MachineInstr *MI = MO.getParent();
65 MachineBasicBlock *BB = MO.getParent()->getParent();
66 MachineFunction *MF = BB->getParent();
67 MachineRegisterInfo &MRI = MF->getRegInfo();
68 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
69
70 if (MO.isReg()) {
71 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
72 unsigned Reg = MO.getReg();
73 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
74 .addReg(Reg, 0, ComposedSubIdx);
75
76 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
77 MO.isKill(), MO.isDead(), MO.isUndef(),
78 MO.isEarlyClobber(), 0, MO.isDebug(),
79 MO.isInternalRead());
80 }
81
82 assert(MO.isImm());
83
84 APInt Imm(64, MO.getImm());
85
86 switch (SubIdx) {
87 default:
88 llvm_unreachable("do not know to split immediate with this sub index.");
89 case AMDGPU::sub0:
90 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
91 case AMDGPU::sub1:
92 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
93 }
94}
95
96bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
97 MachineBasicBlock *BB = I.getParent();
98 MachineFunction *MF = BB->getParent();
99 MachineRegisterInfo &MRI = MF->getRegInfo();
100 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
101 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
102 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
103
104 if (Size != 64)
105 return false;
106
107 DebugLoc DL = I.getDebugLoc();
108
Tom Stellard124f5cc2017-01-31 15:24:11 +0000109 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
110 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
111
Tom Stellardca166212017-01-30 21:56:46 +0000112 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000113 .add(Lo1)
114 .add(Lo2);
115
116 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
117 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000118
119 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000120 .add(Hi1)
121 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000122
123 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
124 .addReg(DstLo)
125 .addImm(AMDGPU::sub0)
126 .addReg(DstHi)
127 .addImm(AMDGPU::sub1);
128
129 for (MachineOperand &MO : I.explicit_operands()) {
130 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
131 continue;
132 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
133 }
134
135 I.eraseFromParent();
136 return true;
137}
138
139bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
140 return selectG_ADD(I);
141}
142
143bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
144 MachineBasicBlock *BB = I.getParent();
145 DebugLoc DL = I.getDebugLoc();
146
147 // FIXME: Select store instruction based on address space
148 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
149 .add(I.getOperand(1))
150 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000151 .addImm(0) // offset
152 .addImm(0) // glc
153 .addImm(0); // slc
Tom Stellardca166212017-01-30 21:56:46 +0000154
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000155
Tom Stellardca166212017-01-30 21:56:46 +0000156 // Now that we selected an opcode, we need to constrain the register
157 // operands to use appropriate classes.
158 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
159
160 I.eraseFromParent();
161 return Ret;
162}
163
164bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
165 MachineBasicBlock *BB = I.getParent();
166 MachineFunction *MF = BB->getParent();
167 MachineRegisterInfo &MRI = MF->getRegInfo();
168 unsigned DstReg = I.getOperand(0).getReg();
169 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
170
171 if (Size == 32) {
172 I.setDesc(TII.get(AMDGPU::S_MOV_B32));
173 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
174 }
175
176 assert(Size == 64);
177
178 DebugLoc DL = I.getDebugLoc();
179 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
180 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
181 const APInt &Imm = I.getOperand(1).getCImm()->getValue();
182
183 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
184 .addImm(Imm.trunc(32).getZExtValue());
185
186 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
187 .addImm(Imm.ashr(32).getZExtValue());
188
189 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
190 .addReg(LoReg)
191 .addImm(AMDGPU::sub0)
192 .addReg(HiReg)
193 .addImm(AMDGPU::sub1);
194 // We can't call constrainSelectedInstRegOperands here, because it doesn't
195 // work for target independent opcodes
196 I.eraseFromParent();
197 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
198}
199
200static bool isConstant(const MachineInstr &MI) {
201 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
202}
203
204void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
205 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
206
207 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
208
209 assert(PtrMI);
210
211 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
212 return;
213
214 GEPInfo GEPInfo(*PtrMI);
215
216 for (unsigned i = 1, e = 3; i < e; ++i) {
217 const MachineOperand &GEPOp = PtrMI->getOperand(i);
218 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
219 assert(OpDef);
220 if (isConstant(*OpDef)) {
221 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
222 // are lacking other optimizations.
223 assert(GEPInfo.Imm == 0);
224 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
225 continue;
226 }
227 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
228 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
229 GEPInfo.SgprParts.push_back(GEPOp.getReg());
230 else
231 GEPInfo.VgprParts.push_back(GEPOp.getReg());
232 }
233
234 AddrInfo.push_back(GEPInfo);
235 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
236}
237
238static bool isInstrUniform(const MachineInstr &MI) {
239 if (!MI.hasOneMemOperand())
240 return false;
241
242 const MachineMemOperand *MMO = *MI.memoperands_begin();
243 const Value *Ptr = MMO->getValue();
244
245 // UndefValue means this is a load of a kernel input. These are uniform.
246 // Sometimes LDS instructions have constant pointers.
247 // If Ptr is null, then that means this mem operand contains a
248 // PseudoSourceValue like GOT.
249 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
250 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
251 return true;
252
Matt Arsenault923712b2018-02-09 16:57:57 +0000253 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
254 return true;
255
Tom Stellardca166212017-01-30 21:56:46 +0000256 const Instruction *I = dyn_cast<Instruction>(Ptr);
257 return I && I->getMetadata("amdgpu.uniform");
258}
259
260static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
261
262 if (LoadSize == 32)
263 return BaseOpcode;
264
265 switch (BaseOpcode) {
266 case AMDGPU::S_LOAD_DWORD_IMM:
267 switch (LoadSize) {
268 case 64:
269 return AMDGPU::S_LOAD_DWORDX2_IMM;
270 case 128:
271 return AMDGPU::S_LOAD_DWORDX4_IMM;
272 case 256:
273 return AMDGPU::S_LOAD_DWORDX8_IMM;
274 case 512:
275 return AMDGPU::S_LOAD_DWORDX16_IMM;
276 }
277 break;
278 case AMDGPU::S_LOAD_DWORD_IMM_ci:
279 switch (LoadSize) {
280 case 64:
281 return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
282 case 128:
283 return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
284 case 256:
285 return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
286 case 512:
287 return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
288 }
289 break;
290 case AMDGPU::S_LOAD_DWORD_SGPR:
291 switch (LoadSize) {
292 case 64:
293 return AMDGPU::S_LOAD_DWORDX2_SGPR;
294 case 128:
295 return AMDGPU::S_LOAD_DWORDX4_SGPR;
296 case 256:
297 return AMDGPU::S_LOAD_DWORDX8_SGPR;
298 case 512:
299 return AMDGPU::S_LOAD_DWORDX16_SGPR;
300 }
301 break;
302 }
303 llvm_unreachable("Invalid base smrd opcode or size");
304}
305
306bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
307 for (const GEPInfo &GEPInfo : AddrInfo) {
308 if (!GEPInfo.VgprParts.empty())
309 return true;
310 }
311 return false;
312}
313
314bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
315 ArrayRef<GEPInfo> AddrInfo) const {
316
317 if (!I.hasOneMemOperand())
318 return false;
319
Matt Arsenault923712b2018-02-09 16:57:57 +0000320 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
321 (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
Tom Stellardca166212017-01-30 21:56:46 +0000322 return false;
323
324 if (!isInstrUniform(I))
325 return false;
326
327 if (hasVgprParts(AddrInfo))
328 return false;
329
330 MachineBasicBlock *BB = I.getParent();
331 MachineFunction *MF = BB->getParent();
332 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
333 MachineRegisterInfo &MRI = MF->getRegInfo();
334 unsigned DstReg = I.getOperand(0).getReg();
335 const DebugLoc &DL = I.getDebugLoc();
336 unsigned Opcode;
337 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
338
339 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
340
341 const GEPInfo &GEPInfo = AddrInfo[0];
342
343 unsigned PtrReg = GEPInfo.SgprParts[0];
344 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
345 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
346 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
347
348 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
349 .addReg(PtrReg)
350 .addImm(EncodedImm)
351 .addImm(0); // glc
352 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
353 }
354
355 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
356 isUInt<32>(EncodedImm)) {
357 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
358 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
359 .addReg(PtrReg)
360 .addImm(EncodedImm)
361 .addImm(0); // glc
362 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
363 }
364
365 if (isUInt<32>(GEPInfo.Imm)) {
366 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
367 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
368 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
369 .addImm(GEPInfo.Imm);
370
371 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
372 .addReg(PtrReg)
373 .addReg(OffsetReg)
374 .addImm(0); // glc
375 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
376 }
377 }
378
379 unsigned PtrReg = I.getOperand(1).getReg();
380 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
381 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
382 .addReg(PtrReg)
383 .addImm(0)
384 .addImm(0); // glc
385 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
386}
387
388
389bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
390 MachineBasicBlock *BB = I.getParent();
391 MachineFunction *MF = BB->getParent();
392 MachineRegisterInfo &MRI = MF->getRegInfo();
393 DebugLoc DL = I.getDebugLoc();
394 unsigned DstReg = I.getOperand(0).getReg();
395 unsigned PtrReg = I.getOperand(1).getReg();
396 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
397 unsigned Opcode;
398
399 SmallVector<GEPInfo, 4> AddrInfo;
400
401 getAddrModeInfo(I, MRI, AddrInfo);
402
403 if (selectSMRD(I, AddrInfo)) {
404 I.eraseFromParent();
405 return true;
406 }
407
408 switch (LoadSize) {
409 default:
410 llvm_unreachable("Load size not supported\n");
411 case 32:
412 Opcode = AMDGPU::FLAT_LOAD_DWORD;
413 break;
414 case 64:
415 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
416 break;
417 }
418
419 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
420 .add(I.getOperand(0))
421 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000422 .addImm(0) // offset
423 .addImm(0) // glc
424 .addImm(0); // slc
Tom Stellardca166212017-01-30 21:56:46 +0000425
426 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
427 I.eraseFromParent();
428 return Ret;
429}
430
Daniel Sandersf76f3152017-11-16 00:46:35 +0000431bool AMDGPUInstructionSelector::select(MachineInstr &I,
432 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +0000433
434 if (!isPreISelGenericOpcode(I.getOpcode()))
435 return true;
436
437 switch (I.getOpcode()) {
438 default:
439 break;
Tom Stellard1dc90202018-05-10 20:53:06 +0000440 case TargetOpcode::G_OR:
441 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000442 case TargetOpcode::G_ADD:
443 return selectG_ADD(I);
444 case TargetOpcode::G_CONSTANT:
445 return selectG_CONSTANT(I);
446 case TargetOpcode::G_GEP:
447 return selectG_GEP(I);
448 case TargetOpcode::G_LOAD:
449 return selectG_LOAD(I);
450 case TargetOpcode::G_STORE:
451 return selectG_STORE(I);
452 }
453 return false;
454}
Tom Stellard1dc90202018-05-10 20:53:06 +0000455
456///
457/// This will select either an SGPR or VGPR operand and will save us from
458/// having to write an extra tablegen pattern.
459InstructionSelector::ComplexRendererFns
460AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
461 return {{
462 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
463 }};
464}