blob: 45bf1f6169fbfd2a3a554e114d643305c3bc0453 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Tom Stellard8b1c53b2019-06-17 16:27:43 +000062static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63 if (Reg == AMDGPU::SCC)
64 return true;
65
66 if (TargetRegisterInfo::isPhysicalRegister(Reg))
67 return false;
68
69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70 const TargetRegisterClass *RC =
71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
72 if (RC)
73 return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
74 MRI.getType(Reg).getSizeInBits() == 1;
75
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Tom Stellard1e0edad2018-05-10 21:20:10 +000080bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
81 MachineBasicBlock *BB = I.getParent();
82 MachineFunction *MF = BB->getParent();
83 MachineRegisterInfo &MRI = MF->getRegInfo();
84 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +000085
86 // Special case for COPY from the scc register bank. The scc register bank
87 // is modeled using 32-bit sgprs.
88 const MachineOperand &Src = I.getOperand(1);
89 unsigned SrcReg = Src.getReg();
90 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
91 unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
92 unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
93
94 // We have a copy from a 32-bit to 64-bit register. This happens
95 // when we are selecting scc->vcc copies.
96 if (DstSize == 64) {
97 const DebugLoc &DL = I.getDebugLoc();
98 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
99 .addImm(0)
100 .addReg(SrcReg);
101 if (!MRI.getRegClassOrNull(SrcReg))
102 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
103 I.eraseFromParent();
104 return true;
105 }
106 }
107
Tom Stellard1e0edad2018-05-10 21:20:10 +0000108 for (const MachineOperand &MO : I.operands()) {
109 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
110 continue;
111
112 const TargetRegisterClass *RC =
113 TRI.getConstrainedRegClassForOperand(MO, MRI);
114 if (!RC)
115 continue;
116 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
117 }
118 return true;
119}
120
Tom Stellardca166212017-01-30 21:56:46 +0000121MachineOperand
122AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
123 unsigned SubIdx) const {
124
125 MachineInstr *MI = MO.getParent();
126 MachineBasicBlock *BB = MO.getParent()->getParent();
127 MachineFunction *MF = BB->getParent();
128 MachineRegisterInfo &MRI = MF->getRegInfo();
129 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
130
131 if (MO.isReg()) {
132 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
133 unsigned Reg = MO.getReg();
134 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
135 .addReg(Reg, 0, ComposedSubIdx);
136
137 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
138 MO.isKill(), MO.isDead(), MO.isUndef(),
139 MO.isEarlyClobber(), 0, MO.isDebug(),
140 MO.isInternalRead());
141 }
142
143 assert(MO.isImm());
144
145 APInt Imm(64, MO.getImm());
146
147 switch (SubIdx) {
148 default:
149 llvm_unreachable("do not know to split immediate with this sub index.");
150 case AMDGPU::sub0:
151 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
152 case AMDGPU::sub1:
153 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
154 }
155}
156
Tom Stellard390a5f42018-07-13 21:05:14 +0000157static int64_t getConstant(const MachineInstr *MI) {
158 return MI->getOperand(1).getCImm()->getSExtValue();
159}
160
Tom Stellardca166212017-01-30 21:56:46 +0000161bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
162 MachineBasicBlock *BB = I.getParent();
163 MachineFunction *MF = BB->getParent();
164 MachineRegisterInfo &MRI = MF->getRegInfo();
165 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
166 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
167 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
168
169 if (Size != 64)
170 return false;
171
172 DebugLoc DL = I.getDebugLoc();
173
Tom Stellard124f5cc2017-01-31 15:24:11 +0000174 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
175 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
176
Tom Stellardca166212017-01-30 21:56:46 +0000177 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000178 .add(Lo1)
179 .add(Lo2);
180
181 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
182 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000183
184 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000185 .add(Hi1)
186 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000187
188 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
189 .addReg(DstLo)
190 .addImm(AMDGPU::sub0)
191 .addReg(DstHi)
192 .addImm(AMDGPU::sub1);
193
194 for (MachineOperand &MO : I.explicit_operands()) {
195 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
196 continue;
197 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
198 }
199
200 I.eraseFromParent();
201 return true;
202}
203
Tom Stellard41f32192019-02-28 23:37:48 +0000204bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
205 MachineBasicBlock *BB = I.getParent();
206 MachineFunction *MF = BB->getParent();
207 MachineRegisterInfo &MRI = MF->getRegInfo();
208 assert(I.getOperand(2).getImm() % 32 == 0);
209 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
210 const DebugLoc &DL = I.getDebugLoc();
211 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
212 I.getOperand(0).getReg())
213 .addReg(I.getOperand(1).getReg(), 0, SubReg);
214
215 for (const MachineOperand &MO : Copy->operands()) {
216 const TargetRegisterClass *RC =
217 TRI.getConstrainedRegClassForOperand(MO, MRI);
218 if (!RC)
219 continue;
220 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
221 }
222 I.eraseFromParent();
223 return true;
224}
225
Tom Stellardca166212017-01-30 21:56:46 +0000226bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
227 return selectG_ADD(I);
228}
229
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000230bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
231 MachineBasicBlock *BB = I.getParent();
232 MachineFunction *MF = BB->getParent();
233 MachineRegisterInfo &MRI = MF->getRegInfo();
234 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000235
236 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
237 // regbank check here is to know why getConstrainedRegClassForOperand failed.
238 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
239 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
240 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
241 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
242 return true;
243 }
244
245 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000246}
247
Tom Stellard33634d1b2019-03-01 00:50:26 +0000248bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
249 MachineBasicBlock *BB = I.getParent();
250 MachineFunction *MF = BB->getParent();
251 MachineRegisterInfo &MRI = MF->getRegInfo();
252 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
253 DebugLoc DL = I.getDebugLoc();
254 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
255 .addDef(I.getOperand(0).getReg())
256 .addReg(I.getOperand(1).getReg())
257 .addReg(I.getOperand(2).getReg())
258 .addImm(SubReg);
259
260 for (const MachineOperand &MO : Ins->operands()) {
261 if (!MO.isReg())
262 continue;
263 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
264 continue;
265
266 const TargetRegisterClass *RC =
267 TRI.getConstrainedRegClassForOperand(MO, MRI);
268 if (!RC)
269 continue;
270 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
271 }
272 I.eraseFromParent();
273 return true;
274}
275
Tom Stellarda9284732018-06-14 19:26:37 +0000276bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
277 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000278 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000279 switch (IntrinsicID) {
280 default:
281 break;
Tom Stellardac684712018-07-13 22:16:03 +0000282 case Intrinsic::maxnum:
283 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000284 case Intrinsic::amdgcn_cvt_pkrtz:
285 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000286
287 case Intrinsic::amdgcn_kernarg_segment_ptr: {
288 MachineFunction *MF = I.getParent()->getParent();
289 MachineRegisterInfo &MRI = MF->getRegInfo();
290 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
291 const ArgDescriptor *InputPtrReg;
292 const TargetRegisterClass *RC;
293 const DebugLoc &DL = I.getDebugLoc();
294
295 std::tie(InputPtrReg, RC)
296 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
297 if (!InputPtrReg)
298 report_fatal_error("missing kernarg segment ptr");
299
300 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
301 .add(I.getOperand(0))
302 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
303 I.eraseFromParent();
304 return true;
305 }
Tom Stellarda9284732018-06-14 19:26:37 +0000306 }
307 return false;
308}
309
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000310static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
311 assert(Size == 32 || Size == 64);
312 switch (P) {
313 default:
314 llvm_unreachable("Unknown condition code!");
315 case CmpInst::ICMP_NE:
316 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
317 case CmpInst::ICMP_EQ:
318 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
319 case CmpInst::ICMP_SGT:
320 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
321 case CmpInst::ICMP_SGE:
322 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
323 case CmpInst::ICMP_SLT:
324 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
325 case CmpInst::ICMP_SLE:
326 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
327 case CmpInst::ICMP_UGT:
328 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
329 case CmpInst::ICMP_UGE:
330 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
331 case CmpInst::ICMP_ULT:
332 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
333 case CmpInst::ICMP_ULE:
334 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
335 }
336}
337
338static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
339 // FIXME: VI supports 64-bit comparse.
340 assert(Size == 32);
341 switch (P) {
342 default:
343 llvm_unreachable("Unknown condition code!");
344 case CmpInst::ICMP_NE:
345 return AMDGPU::S_CMP_LG_U32;
346 case CmpInst::ICMP_EQ:
347 return AMDGPU::S_CMP_EQ_U32;
348 case CmpInst::ICMP_SGT:
349 return AMDGPU::S_CMP_GT_I32;
350 case CmpInst::ICMP_SGE:
351 return AMDGPU::S_CMP_GE_I32;
352 case CmpInst::ICMP_SLT:
353 return AMDGPU::S_CMP_LT_I32;
354 case CmpInst::ICMP_SLE:
355 return AMDGPU::S_CMP_LE_I32;
356 case CmpInst::ICMP_UGT:
357 return AMDGPU::S_CMP_GT_U32;
358 case CmpInst::ICMP_UGE:
359 return AMDGPU::S_CMP_GE_U32;
360 case CmpInst::ICMP_ULT:
361 return AMDGPU::S_CMP_LT_U32;
362 case CmpInst::ICMP_ULE:
363 return AMDGPU::S_CMP_LE_U32;
364 }
365}
366
367bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
368 MachineBasicBlock *BB = I.getParent();
369 MachineFunction *MF = BB->getParent();
370 MachineRegisterInfo &MRI = MF->getRegInfo();
371 DebugLoc DL = I.getDebugLoc();
372
373 unsigned SrcReg = I.getOperand(2).getReg();
374 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
375 // FIXME: VI supports 64-bit compares.
376 assert(Size == 32);
377
378 unsigned CCReg = I.getOperand(0).getReg();
379 if (isSCC(CCReg, MRI)) {
380 unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
381 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
382 .add(I.getOperand(2))
383 .add(I.getOperand(3));
384 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
385 .addReg(AMDGPU::SCC);
386 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) |
387 constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI);
388 I.eraseFromParent();
389 return Ret;
390 }
391
392 assert(Size == 32 || Size == 64);
393 unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
394 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
395 I.getOperand(0).getReg())
396 .add(I.getOperand(2))
397 .add(I.getOperand(3));
398 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
399 AMDGPU::SReg_64RegClass, MRI);
400 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
401 I.eraseFromParent();
402 return Ret;
403}
404
Tom Stellard390a5f42018-07-13 21:05:14 +0000405static MachineInstr *
406buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
407 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
408 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
409 const DebugLoc &DL = Insert->getDebugLoc();
410 MachineBasicBlock &BB = *Insert->getParent();
411 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
412 return BuildMI(BB, Insert, DL, TII.get(Opcode))
413 .addImm(Tgt)
414 .addReg(Reg0)
415 .addReg(Reg1)
416 .addReg(Reg2)
417 .addReg(Reg3)
418 .addImm(VM)
419 .addImm(Compr)
420 .addImm(Enabled);
421}
422
423bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
424 MachineInstr &I,
425 CodeGenCoverage &CoverageInfo) const {
426 MachineBasicBlock *BB = I.getParent();
427 MachineFunction *MF = BB->getParent();
428 MachineRegisterInfo &MRI = MF->getRegInfo();
429
430 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
431 switch (IntrinsicID) {
432 case Intrinsic::amdgcn_exp: {
433 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
434 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
435 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
436 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
437
438 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
439 I.getOperand(4).getReg(),
440 I.getOperand(5).getReg(),
441 I.getOperand(6).getReg(),
442 VM, false, Enabled, Done);
443
444 I.eraseFromParent();
445 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
446 }
447 case Intrinsic::amdgcn_exp_compr: {
448 const DebugLoc &DL = I.getDebugLoc();
449 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
450 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
451 unsigned Reg0 = I.getOperand(3).getReg();
452 unsigned Reg1 = I.getOperand(4).getReg();
453 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
454 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
455 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
456
457 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
458 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
459 true, Enabled, Done);
460
461 I.eraseFromParent();
462 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
463 }
464 }
465 return false;
466}
467
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000468bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
469 MachineBasicBlock *BB = I.getParent();
470 MachineFunction *MF = BB->getParent();
471 MachineRegisterInfo &MRI = MF->getRegInfo();
472 const DebugLoc &DL = I.getDebugLoc();
473
474 unsigned DstReg = I.getOperand(0).getReg();
475 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
476 assert(Size == 32 || Size == 64);
477 const MachineOperand &CCOp = I.getOperand(1);
478 unsigned CCReg = CCOp.getReg();
479 if (isSCC(CCReg, MRI)) {
480 unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
481 AMDGPU::S_CSELECT_B64;
482 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
483 .addReg(CCReg);
484
485 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
486 // bank, because it does not cover the register class that we used to represent
487 // for it. So we need to manually set the register class here.
488 if (!MRI.getRegClassOrNull(CCReg))
489 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
490 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
491 .add(I.getOperand(2))
492 .add(I.getOperand(3));
493
494 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
495 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
496 I.eraseFromParent();
497 return Ret;
498 }
499
500 assert(Size == 32);
501 // FIXME: Support 64-bit select
502 MachineInstr *Select =
503 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
504 .addImm(0)
505 .add(I.getOperand(3))
506 .addImm(0)
507 .add(I.getOperand(2))
508 .add(I.getOperand(1));
509
510 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
511 I.eraseFromParent();
512 return Ret;
513}
514
Tom Stellardca166212017-01-30 21:56:46 +0000515bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
516 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000517 MachineFunction *MF = BB->getParent();
518 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000519 DebugLoc DL = I.getDebugLoc();
Tom Stellard655fdd32018-05-11 23:12:49 +0000520 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
521 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000522
523 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000524 switch (StoreSize) {
525 default:
526 return false;
527 case 32:
528 Opcode = AMDGPU::FLAT_STORE_DWORD;
529 break;
530 case 64:
531 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
532 break;
533 case 96:
534 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
535 break;
536 case 128:
537 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
538 break;
539 }
540
541 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000542 .add(I.getOperand(1))
543 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000544 .addImm(0) // offset
545 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000546 .addImm(0) // slc
547 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000548
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000549
Tom Stellardca166212017-01-30 21:56:46 +0000550 // Now that we selected an opcode, we need to constrain the register
551 // operands to use appropriate classes.
552 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
553
554 I.eraseFromParent();
555 return Ret;
556}
557
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000558static int sizeToSubRegIndex(unsigned Size) {
559 switch (Size) {
560 case 32:
561 return AMDGPU::sub0;
562 case 64:
563 return AMDGPU::sub0_sub1;
564 case 96:
565 return AMDGPU::sub0_sub1_sub2;
566 case 128:
567 return AMDGPU::sub0_sub1_sub2_sub3;
568 case 256:
569 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
570 default:
571 if (Size < 32)
572 return AMDGPU::sub0;
573 if (Size > 256)
574 return -1;
575 return sizeToSubRegIndex(PowerOf2Ceil(Size));
576 }
577}
578
579bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
580 MachineBasicBlock *BB = I.getParent();
581 MachineFunction *MF = BB->getParent();
582 MachineRegisterInfo &MRI = MF->getRegInfo();
583
584 unsigned DstReg = I.getOperand(0).getReg();
585 unsigned SrcReg = I.getOperand(1).getReg();
586 const LLT DstTy = MRI.getType(DstReg);
587 const LLT SrcTy = MRI.getType(SrcReg);
588 if (!DstTy.isScalar())
589 return false;
590
591 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
592 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
593 if (SrcRB != DstRB)
594 return false;
595
596 unsigned DstSize = DstTy.getSizeInBits();
597 unsigned SrcSize = SrcTy.getSizeInBits();
598
599 const TargetRegisterClass *SrcRC
600 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
601 const TargetRegisterClass *DstRC
602 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
603
604 if (SrcSize > 32) {
605 int SubRegIdx = sizeToSubRegIndex(DstSize);
606 if (SubRegIdx == -1)
607 return false;
608
609 // Deal with weird cases where the class only partially supports the subreg
610 // index.
611 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
612 if (!SrcRC)
613 return false;
614
615 I.getOperand(1).setSubReg(SubRegIdx);
616 }
617
618 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
619 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
620 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
621 return false;
622 }
623
624 I.setDesc(TII.get(TargetOpcode::COPY));
625 return true;
626}
627
Tom Stellardca166212017-01-30 21:56:46 +0000628bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
629 MachineBasicBlock *BB = I.getParent();
630 MachineFunction *MF = BB->getParent();
631 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000632 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000633
Tom Stellarde182b282018-05-15 17:57:09 +0000634 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
635 if (ImmOp.isFPImm()) {
636 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
637 ImmOp.ChangeToImmediate(Imm.getZExtValue());
638 } else if (ImmOp.isCImm()) {
639 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
640 }
641
642 unsigned DstReg = I.getOperand(0).getReg();
643 unsigned Size;
644 bool IsSgpr;
645 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
646 if (RB) {
647 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
648 Size = MRI.getType(DstReg).getSizeInBits();
649 } else {
650 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
651 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000652 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000653 }
654
655 if (Size != 32 && Size != 64)
656 return false;
657
658 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000659 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000660 I.setDesc(TII.get(Opcode));
661 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000662 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
663 }
664
Tom Stellardca166212017-01-30 21:56:46 +0000665 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000666 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
667 &AMDGPU::VGPR_32RegClass;
668 unsigned LoReg = MRI.createVirtualRegister(RC);
669 unsigned HiReg = MRI.createVirtualRegister(RC);
670 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000671
Tom Stellarde182b282018-05-15 17:57:09 +0000672 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000673 .addImm(Imm.trunc(32).getZExtValue());
674
Tom Stellarde182b282018-05-15 17:57:09 +0000675 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000676 .addImm(Imm.ashr(32).getZExtValue());
677
Tom Stellarde182b282018-05-15 17:57:09 +0000678 const MachineInstr *RS =
679 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
680 .addReg(LoReg)
681 .addImm(AMDGPU::sub0)
682 .addReg(HiReg)
683 .addImm(AMDGPU::sub1);
684
Tom Stellardca166212017-01-30 21:56:46 +0000685 // We can't call constrainSelectedInstRegOperands here, because it doesn't
686 // work for target independent opcodes
687 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000688 const TargetRegisterClass *DstRC =
689 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
690 if (!DstRC)
691 return true;
692 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000693}
694
695static bool isConstant(const MachineInstr &MI) {
696 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
697}
698
699void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
700 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
701
702 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
703
704 assert(PtrMI);
705
706 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
707 return;
708
709 GEPInfo GEPInfo(*PtrMI);
710
711 for (unsigned i = 1, e = 3; i < e; ++i) {
712 const MachineOperand &GEPOp = PtrMI->getOperand(i);
713 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
714 assert(OpDef);
715 if (isConstant(*OpDef)) {
716 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
717 // are lacking other optimizations.
718 assert(GEPInfo.Imm == 0);
719 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
720 continue;
721 }
722 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
723 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
724 GEPInfo.SgprParts.push_back(GEPOp.getReg());
725 else
726 GEPInfo.VgprParts.push_back(GEPOp.getReg());
727 }
728
729 AddrInfo.push_back(GEPInfo);
730 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
731}
732
Tom Stellard79b5c382019-02-20 21:02:37 +0000733bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000734 if (!MI.hasOneMemOperand())
735 return false;
736
737 const MachineMemOperand *MMO = *MI.memoperands_begin();
738 const Value *Ptr = MMO->getValue();
739
740 // UndefValue means this is a load of a kernel input. These are uniform.
741 // Sometimes LDS instructions have constant pointers.
742 // If Ptr is null, then that means this mem operand contains a
743 // PseudoSourceValue like GOT.
744 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
745 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
746 return true;
747
Matt Arsenault923712b2018-02-09 16:57:57 +0000748 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
749 return true;
750
Tom Stellardca166212017-01-30 21:56:46 +0000751 const Instruction *I = dyn_cast<Instruction>(Ptr);
752 return I && I->getMetadata("amdgpu.uniform");
753}
754
Tom Stellardca166212017-01-30 21:56:46 +0000755bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
756 for (const GEPInfo &GEPInfo : AddrInfo) {
757 if (!GEPInfo.VgprParts.empty())
758 return true;
759 }
760 return false;
761}
762
Tom Stellardca166212017-01-30 21:56:46 +0000763bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
764 MachineBasicBlock *BB = I.getParent();
765 MachineFunction *MF = BB->getParent();
766 MachineRegisterInfo &MRI = MF->getRegInfo();
767 DebugLoc DL = I.getDebugLoc();
768 unsigned DstReg = I.getOperand(0).getReg();
769 unsigned PtrReg = I.getOperand(1).getReg();
770 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
771 unsigned Opcode;
772
773 SmallVector<GEPInfo, 4> AddrInfo;
774
775 getAddrModeInfo(I, MRI, AddrInfo);
776
Tom Stellardca166212017-01-30 21:56:46 +0000777 switch (LoadSize) {
778 default:
779 llvm_unreachable("Load size not supported\n");
780 case 32:
781 Opcode = AMDGPU::FLAT_LOAD_DWORD;
782 break;
783 case 64:
784 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
785 break;
786 }
787
788 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
789 .add(I.getOperand(0))
790 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000791 .addImm(0) // offset
792 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000793 .addImm(0) // slc
794 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000795
796 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
797 I.eraseFromParent();
798 return Ret;
799}
800
Daniel Sandersf76f3152017-11-16 00:46:35 +0000801bool AMDGPUInstructionSelector::select(MachineInstr &I,
802 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +0000803
Tom Stellard7712ee82018-06-22 00:44:29 +0000804 if (!isPreISelGenericOpcode(I.getOpcode())) {
805 if (I.isCopy())
806 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000807 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +0000808 }
Tom Stellardca166212017-01-30 21:56:46 +0000809
810 switch (I.getOpcode()) {
811 default:
Tom Stellard1dc90202018-05-10 20:53:06 +0000812 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000813 case TargetOpcode::G_ADD:
814 return selectG_ADD(I);
Tom Stellard7c650782018-10-05 04:34:09 +0000815 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +0000816 case TargetOpcode::G_BITCAST:
817 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000818 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +0000819 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +0000820 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +0000821 case TargetOpcode::G_EXTRACT:
822 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +0000823 case TargetOpcode::G_GEP:
824 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000825 case TargetOpcode::G_IMPLICIT_DEF:
826 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +0000827 case TargetOpcode::G_INSERT:
828 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +0000829 case TargetOpcode::G_INTRINSIC:
830 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000831 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
832 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000833 case TargetOpcode::G_ICMP:
834 return selectG_ICMP(I);
Tom Stellardca166212017-01-30 21:56:46 +0000835 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +0000836 if (selectImpl(I, CoverageInfo))
837 return true;
Tom Stellardca166212017-01-30 21:56:46 +0000838 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000839 case TargetOpcode::G_SELECT:
840 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +0000841 case TargetOpcode::G_STORE:
842 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000843 case TargetOpcode::G_TRUNC:
844 return selectG_TRUNC(I);
Tom Stellardca166212017-01-30 21:56:46 +0000845 }
846 return false;
847}
Tom Stellard1dc90202018-05-10 20:53:06 +0000848
Tom Stellard26fac0f2018-06-22 02:54:57 +0000849InstructionSelector::ComplexRendererFns
850AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
851 return {{
852 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
853 }};
854
855}
856
Tom Stellard1dc90202018-05-10 20:53:06 +0000857///
858/// This will select either an SGPR or VGPR operand and will save us from
859/// having to write an extra tablegen pattern.
860InstructionSelector::ComplexRendererFns
861AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
862 return {{
863 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
864 }};
865}
Tom Stellarddcc95e92018-05-11 05:44:16 +0000866
867InstructionSelector::ComplexRendererFns
868AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
869 return {{
870 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
871 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
872 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
873 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
874 }};
875}
Tom Stellard9a653572018-06-22 02:34:29 +0000876InstructionSelector::ComplexRendererFns
877AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
878 return {{
879 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
880 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
881 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
882 }};
883}
Tom Stellard46bbbc32018-06-13 22:30:47 +0000884
885InstructionSelector::ComplexRendererFns
886AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
887 return {{
888 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
889 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
890 }};
891}
Tom Stellard79b5c382019-02-20 21:02:37 +0000892
893InstructionSelector::ComplexRendererFns
894AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
895 MachineRegisterInfo &MRI =
896 Root.getParent()->getParent()->getParent()->getRegInfo();
897
898 SmallVector<GEPInfo, 4> AddrInfo;
899 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
900
901 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
902 return None;
903
904 const GEPInfo &GEPInfo = AddrInfo[0];
905
906 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
907 return None;
908
909 unsigned PtrReg = GEPInfo.SgprParts[0];
910 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
911 return {{
912 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
913 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
914 }};
915}
916
917InstructionSelector::ComplexRendererFns
918AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
919 MachineRegisterInfo &MRI =
920 Root.getParent()->getParent()->getParent()->getRegInfo();
921
922 SmallVector<GEPInfo, 4> AddrInfo;
923 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
924
925 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
926 return None;
927
928 const GEPInfo &GEPInfo = AddrInfo[0];
929 unsigned PtrReg = GEPInfo.SgprParts[0];
930 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
931 if (!isUInt<32>(EncodedImm))
932 return None;
933
934 return {{
935 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
936 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
937 }};
938}
939
940InstructionSelector::ComplexRendererFns
941AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
942 MachineInstr *MI = Root.getParent();
943 MachineBasicBlock *MBB = MI->getParent();
944 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
945
946 SmallVector<GEPInfo, 4> AddrInfo;
947 getAddrModeInfo(*MI, MRI, AddrInfo);
948
949 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
950 // then we can select all ptr + 32-bit offsets not just immediate offsets.
951 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
952 return None;
953
954 const GEPInfo &GEPInfo = AddrInfo[0];
955 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
956 return None;
957
958 // If we make it this far we have a load with an 32-bit immediate offset.
959 // It is OK to select this using a sgpr offset, because we have already
960 // failed trying to select this load into one of the _IMM variants since
961 // the _IMM Patterns are considered before the _SGPR patterns.
962 unsigned PtrReg = GEPInfo.SgprParts[0];
963 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
964 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
965 .addImm(GEPInfo.Imm);
966 return {{
967 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
968 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
969 }};
970}