blob: 085a9c2f6fa6481004093765ef196d2555032a99 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file implements the targeting of the InstructionSelector class for
11/// AMDGPU.
12/// \todo This should be generated by TableGen.
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUInstructionSelector.h"
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterBankInfo.h"
18#include "AMDGPURegisterInfo.h"
19#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000020#include "AMDGPUTargetMachine.h"
21#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
22#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000023#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000024#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstr.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/IR/Type.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32
33#define DEBUG_TYPE "amdgpu-isel"
34
35using namespace llvm;
36
Tom Stellard1dc90202018-05-10 20:53:06 +000037#define GET_GLOBALISEL_IMPL
38#include "AMDGPUGenGlobalISel.inc"
39#undef GET_GLOBALISEL_IMPL
40
Tom Stellardca166212017-01-30 21:56:46 +000041AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard1dc90202018-05-10 20:53:06 +000042 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI,
43 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000044 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000045 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
46 STI(STI),
47 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
48#define GET_GLOBALISEL_PREDICATES_INIT
49#include "AMDGPUGenGlobalISel.inc"
50#undef GET_GLOBALISEL_PREDICATES_INIT
51#define GET_GLOBALISEL_TEMPORARIES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_TEMPORARIES_INIT
54 ,AMDGPUASI(STI.getAMDGPUAS())
55{
56}
57
58const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000059
Tom Stellard1e0edad2018-05-10 21:20:10 +000060bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
61 MachineBasicBlock *BB = I.getParent();
62 MachineFunction *MF = BB->getParent();
63 MachineRegisterInfo &MRI = MF->getRegInfo();
64 I.setDesc(TII.get(TargetOpcode::COPY));
65 for (const MachineOperand &MO : I.operands()) {
66 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
67 continue;
68
69 const TargetRegisterClass *RC =
70 TRI.getConstrainedRegClassForOperand(MO, MRI);
71 if (!RC)
72 continue;
73 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
74 }
75 return true;
76}
77
Tom Stellardca166212017-01-30 21:56:46 +000078MachineOperand
79AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
80 unsigned SubIdx) const {
81
82 MachineInstr *MI = MO.getParent();
83 MachineBasicBlock *BB = MO.getParent()->getParent();
84 MachineFunction *MF = BB->getParent();
85 MachineRegisterInfo &MRI = MF->getRegInfo();
86 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
87
88 if (MO.isReg()) {
89 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
90 unsigned Reg = MO.getReg();
91 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
92 .addReg(Reg, 0, ComposedSubIdx);
93
94 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
95 MO.isKill(), MO.isDead(), MO.isUndef(),
96 MO.isEarlyClobber(), 0, MO.isDebug(),
97 MO.isInternalRead());
98 }
99
100 assert(MO.isImm());
101
102 APInt Imm(64, MO.getImm());
103
104 switch (SubIdx) {
105 default:
106 llvm_unreachable("do not know to split immediate with this sub index.");
107 case AMDGPU::sub0:
108 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
109 case AMDGPU::sub1:
110 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
111 }
112}
113
114bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
115 MachineBasicBlock *BB = I.getParent();
116 MachineFunction *MF = BB->getParent();
117 MachineRegisterInfo &MRI = MF->getRegInfo();
118 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
119 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
120 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
121
122 if (Size != 64)
123 return false;
124
125 DebugLoc DL = I.getDebugLoc();
126
Tom Stellard124f5cc2017-01-31 15:24:11 +0000127 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
128 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
129
Tom Stellardca166212017-01-30 21:56:46 +0000130 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000131 .add(Lo1)
132 .add(Lo2);
133
134 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
135 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000136
137 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000138 .add(Hi1)
139 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000140
141 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
142 .addReg(DstLo)
143 .addImm(AMDGPU::sub0)
144 .addReg(DstHi)
145 .addImm(AMDGPU::sub1);
146
147 for (MachineOperand &MO : I.explicit_operands()) {
148 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
149 continue;
150 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
151 }
152
153 I.eraseFromParent();
154 return true;
155}
156
157bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
158 return selectG_ADD(I);
159}
160
161bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
162 MachineBasicBlock *BB = I.getParent();
163 DebugLoc DL = I.getDebugLoc();
164
165 // FIXME: Select store instruction based on address space
166 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
167 .add(I.getOperand(1))
168 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000169 .addImm(0) // offset
170 .addImm(0) // glc
171 .addImm(0); // slc
Tom Stellardca166212017-01-30 21:56:46 +0000172
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000173
Tom Stellardca166212017-01-30 21:56:46 +0000174 // Now that we selected an opcode, we need to constrain the register
175 // operands to use appropriate classes.
176 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
177
178 I.eraseFromParent();
179 return Ret;
180}
181
182bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
183 MachineBasicBlock *BB = I.getParent();
184 MachineFunction *MF = BB->getParent();
185 MachineRegisterInfo &MRI = MF->getRegInfo();
186 unsigned DstReg = I.getOperand(0).getReg();
187 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
188
189 if (Size == 32) {
190 I.setDesc(TII.get(AMDGPU::S_MOV_B32));
191 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
192 }
193
194 assert(Size == 64);
195
196 DebugLoc DL = I.getDebugLoc();
197 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
198 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
199 const APInt &Imm = I.getOperand(1).getCImm()->getValue();
200
201 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
202 .addImm(Imm.trunc(32).getZExtValue());
203
204 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
205 .addImm(Imm.ashr(32).getZExtValue());
206
207 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
208 .addReg(LoReg)
209 .addImm(AMDGPU::sub0)
210 .addReg(HiReg)
211 .addImm(AMDGPU::sub1);
212 // We can't call constrainSelectedInstRegOperands here, because it doesn't
213 // work for target independent opcodes
214 I.eraseFromParent();
215 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
216}
217
218static bool isConstant(const MachineInstr &MI) {
219 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
220}
221
222void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
223 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
224
225 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
226
227 assert(PtrMI);
228
229 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
230 return;
231
232 GEPInfo GEPInfo(*PtrMI);
233
234 for (unsigned i = 1, e = 3; i < e; ++i) {
235 const MachineOperand &GEPOp = PtrMI->getOperand(i);
236 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
237 assert(OpDef);
238 if (isConstant(*OpDef)) {
239 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
240 // are lacking other optimizations.
241 assert(GEPInfo.Imm == 0);
242 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
243 continue;
244 }
245 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
246 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
247 GEPInfo.SgprParts.push_back(GEPOp.getReg());
248 else
249 GEPInfo.VgprParts.push_back(GEPOp.getReg());
250 }
251
252 AddrInfo.push_back(GEPInfo);
253 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
254}
255
256static bool isInstrUniform(const MachineInstr &MI) {
257 if (!MI.hasOneMemOperand())
258 return false;
259
260 const MachineMemOperand *MMO = *MI.memoperands_begin();
261 const Value *Ptr = MMO->getValue();
262
263 // UndefValue means this is a load of a kernel input. These are uniform.
264 // Sometimes LDS instructions have constant pointers.
265 // If Ptr is null, then that means this mem operand contains a
266 // PseudoSourceValue like GOT.
267 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
268 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
269 return true;
270
Matt Arsenault923712b2018-02-09 16:57:57 +0000271 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
272 return true;
273
Tom Stellardca166212017-01-30 21:56:46 +0000274 const Instruction *I = dyn_cast<Instruction>(Ptr);
275 return I && I->getMetadata("amdgpu.uniform");
276}
277
278static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
279
280 if (LoadSize == 32)
281 return BaseOpcode;
282
283 switch (BaseOpcode) {
284 case AMDGPU::S_LOAD_DWORD_IMM:
285 switch (LoadSize) {
286 case 64:
287 return AMDGPU::S_LOAD_DWORDX2_IMM;
288 case 128:
289 return AMDGPU::S_LOAD_DWORDX4_IMM;
290 case 256:
291 return AMDGPU::S_LOAD_DWORDX8_IMM;
292 case 512:
293 return AMDGPU::S_LOAD_DWORDX16_IMM;
294 }
295 break;
296 case AMDGPU::S_LOAD_DWORD_IMM_ci:
297 switch (LoadSize) {
298 case 64:
299 return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
300 case 128:
301 return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
302 case 256:
303 return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
304 case 512:
305 return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
306 }
307 break;
308 case AMDGPU::S_LOAD_DWORD_SGPR:
309 switch (LoadSize) {
310 case 64:
311 return AMDGPU::S_LOAD_DWORDX2_SGPR;
312 case 128:
313 return AMDGPU::S_LOAD_DWORDX4_SGPR;
314 case 256:
315 return AMDGPU::S_LOAD_DWORDX8_SGPR;
316 case 512:
317 return AMDGPU::S_LOAD_DWORDX16_SGPR;
318 }
319 break;
320 }
321 llvm_unreachable("Invalid base smrd opcode or size");
322}
323
324bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
325 for (const GEPInfo &GEPInfo : AddrInfo) {
326 if (!GEPInfo.VgprParts.empty())
327 return true;
328 }
329 return false;
330}
331
332bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
333 ArrayRef<GEPInfo> AddrInfo) const {
334
335 if (!I.hasOneMemOperand())
336 return false;
337
Matt Arsenault923712b2018-02-09 16:57:57 +0000338 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
339 (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
Tom Stellardca166212017-01-30 21:56:46 +0000340 return false;
341
342 if (!isInstrUniform(I))
343 return false;
344
345 if (hasVgprParts(AddrInfo))
346 return false;
347
348 MachineBasicBlock *BB = I.getParent();
349 MachineFunction *MF = BB->getParent();
350 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
351 MachineRegisterInfo &MRI = MF->getRegInfo();
352 unsigned DstReg = I.getOperand(0).getReg();
353 const DebugLoc &DL = I.getDebugLoc();
354 unsigned Opcode;
355 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
356
357 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
358
359 const GEPInfo &GEPInfo = AddrInfo[0];
360
361 unsigned PtrReg = GEPInfo.SgprParts[0];
362 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
363 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
364 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
365
366 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
367 .addReg(PtrReg)
368 .addImm(EncodedImm)
369 .addImm(0); // glc
370 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
371 }
372
373 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
374 isUInt<32>(EncodedImm)) {
375 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
376 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
377 .addReg(PtrReg)
378 .addImm(EncodedImm)
379 .addImm(0); // glc
380 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
381 }
382
383 if (isUInt<32>(GEPInfo.Imm)) {
384 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
385 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
386 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
387 .addImm(GEPInfo.Imm);
388
389 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
390 .addReg(PtrReg)
391 .addReg(OffsetReg)
392 .addImm(0); // glc
393 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
394 }
395 }
396
397 unsigned PtrReg = I.getOperand(1).getReg();
398 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
399 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
400 .addReg(PtrReg)
401 .addImm(0)
402 .addImm(0); // glc
403 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
404}
405
406
407bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
408 MachineBasicBlock *BB = I.getParent();
409 MachineFunction *MF = BB->getParent();
410 MachineRegisterInfo &MRI = MF->getRegInfo();
411 DebugLoc DL = I.getDebugLoc();
412 unsigned DstReg = I.getOperand(0).getReg();
413 unsigned PtrReg = I.getOperand(1).getReg();
414 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
415 unsigned Opcode;
416
417 SmallVector<GEPInfo, 4> AddrInfo;
418
419 getAddrModeInfo(I, MRI, AddrInfo);
420
421 if (selectSMRD(I, AddrInfo)) {
422 I.eraseFromParent();
423 return true;
424 }
425
426 switch (LoadSize) {
427 default:
428 llvm_unreachable("Load size not supported\n");
429 case 32:
430 Opcode = AMDGPU::FLAT_LOAD_DWORD;
431 break;
432 case 64:
433 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
434 break;
435 }
436
437 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
438 .add(I.getOperand(0))
439 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000440 .addImm(0) // offset
441 .addImm(0) // glc
442 .addImm(0); // slc
Tom Stellardca166212017-01-30 21:56:46 +0000443
444 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
445 I.eraseFromParent();
446 return Ret;
447}
448
Daniel Sandersf76f3152017-11-16 00:46:35 +0000449bool AMDGPUInstructionSelector::select(MachineInstr &I,
450 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +0000451
452 if (!isPreISelGenericOpcode(I.getOpcode()))
453 return true;
454
455 switch (I.getOpcode()) {
456 default:
457 break;
Tom Stellard1dc90202018-05-10 20:53:06 +0000458 case TargetOpcode::G_OR:
459 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000460 case TargetOpcode::G_ADD:
461 return selectG_ADD(I);
Tom Stellard1e0edad2018-05-10 21:20:10 +0000462 case TargetOpcode::G_BITCAST:
463 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000464 case TargetOpcode::G_CONSTANT:
465 return selectG_CONSTANT(I);
466 case TargetOpcode::G_GEP:
467 return selectG_GEP(I);
468 case TargetOpcode::G_LOAD:
469 return selectG_LOAD(I);
470 case TargetOpcode::G_STORE:
471 return selectG_STORE(I);
472 }
473 return false;
474}
Tom Stellard1dc90202018-05-10 20:53:06 +0000475
476///
477/// This will select either an SGPR or VGPR operand and will save us from
478/// having to write an extra tablegen pattern.
479InstructionSelector::ComplexRendererFns
480AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
481 return {{
482 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
483 }};
484}