blob: bd4c73ef3c9c1467b63702bfacb1c7a48f14b186 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +000089 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault9f992c22019-07-01 13:22:07 +000090 MRI.getType(Reg).getSizeInBits() == 1;
91 }
92
93 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
94 return RB->getID() == AMDGPU::VCCRegBankID;
95}
96
Tom Stellard1e0edad2018-05-10 21:20:10 +000097bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
98 MachineBasicBlock *BB = I.getParent();
99 MachineFunction *MF = BB->getParent();
100 MachineRegisterInfo &MRI = MF->getRegInfo();
101 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000102
103 // Special case for COPY from the scc register bank. The scc register bank
104 // is modeled using 32-bit sgprs.
105 const MachineOperand &Src = I.getOperand(1);
106 unsigned SrcReg = Src.getReg();
107 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault9f992c22019-07-01 13:22:07 +0000110 // Specially handle scc->vcc copies.
Matt Arsenault2ab25f92019-07-01 16:06:02 +0000111 if (isVCC(DstReg, MRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000113 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000114 .addImm(0)
115 .addReg(SrcReg);
116 if (!MRI.getRegClassOrNull(SrcReg))
117 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
118 I.eraseFromParent();
119 return true;
120 }
121 }
122
Tom Stellard1e0edad2018-05-10 21:20:10 +0000123 for (const MachineOperand &MO : I.operands()) {
124 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
125 continue;
126
127 const TargetRegisterClass *RC =
128 TRI.getConstrainedRegClassForOperand(MO, MRI);
129 if (!RC)
130 continue;
131 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
132 }
133 return true;
134}
135
Matt Arsenaulte1006252019-07-01 16:32:47 +0000136bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
137 MachineBasicBlock *BB = I.getParent();
138 MachineFunction *MF = BB->getParent();
139 MachineRegisterInfo &MRI = MF->getRegInfo();
140
141 const Register DefReg = I.getOperand(0).getReg();
142 const LLT DefTy = MRI.getType(DefReg);
143
144 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
145
146 const RegClassOrRegBank &RegClassOrBank =
147 MRI.getRegClassOrRegBank(DefReg);
148
149 const TargetRegisterClass *DefRC
150 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
151 if (!DefRC) {
152 if (!DefTy.isValid()) {
153 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
154 return false;
155 }
156
157 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
158 if (RB.getID() == AMDGPU::SCCRegBankID) {
159 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
160 return false;
161 }
162
163 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
164 if (!DefRC) {
165 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
166 return false;
167 }
168 }
169
170 I.setDesc(TII.get(TargetOpcode::PHI));
171 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
172}
173
Tom Stellardca166212017-01-30 21:56:46 +0000174MachineOperand
175AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000176 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000177 unsigned SubIdx) const {
178
179 MachineInstr *MI = MO.getParent();
180 MachineBasicBlock *BB = MO.getParent()->getParent();
181 MachineFunction *MF = BB->getParent();
182 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000183 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000184
185 if (MO.isReg()) {
186 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
187 unsigned Reg = MO.getReg();
188 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
189 .addReg(Reg, 0, ComposedSubIdx);
190
191 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
192 MO.isKill(), MO.isDead(), MO.isUndef(),
193 MO.isEarlyClobber(), 0, MO.isDebug(),
194 MO.isInternalRead());
195 }
196
197 assert(MO.isImm());
198
199 APInt Imm(64, MO.getImm());
200
201 switch (SubIdx) {
202 default:
203 llvm_unreachable("do not know to split immediate with this sub index.");
204 case AMDGPU::sub0:
205 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
206 case AMDGPU::sub1:
207 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
208 }
209}
210
Tom Stellard390a5f42018-07-13 21:05:14 +0000211static int64_t getConstant(const MachineInstr *MI) {
212 return MI->getOperand(1).getCImm()->getSExtValue();
213}
214
Tom Stellardca166212017-01-30 21:56:46 +0000215bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
216 MachineBasicBlock *BB = I.getParent();
217 MachineFunction *MF = BB->getParent();
218 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000219 Register DstReg = I.getOperand(0).getReg();
220 const DebugLoc &DL = I.getDebugLoc();
221 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
222 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
223 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Tom Stellardca166212017-01-30 21:56:46 +0000224
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000225 if (Size == 32) {
226 if (IsSALU) {
227 MachineInstr *Add =
228 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstReg)
229 .add(I.getOperand(1))
230 .add(I.getOperand(2));
231 I.eraseFromParent();
232 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
233 }
Tom Stellardca166212017-01-30 21:56:46 +0000234
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000235 if (STI.hasAddNoCarry()) {
236 I.setDesc(TII.get(AMDGPU::V_ADD_U32_e64));
237 I.addOperand(*MF, MachineOperand::CreateImm(0));
238 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
239 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
240 }
Tom Stellardca166212017-01-30 21:56:46 +0000241
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000242 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
243 MachineInstr *Add
244 = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstReg)
245 .addDef(UnusedCarry, RegState::Dead)
246 .add(I.getOperand(1))
247 .add(I.getOperand(2))
248 .addImm(0);
249 I.eraseFromParent();
250 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000251 }
252
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000253 const TargetRegisterClass &RC
254 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
255 const TargetRegisterClass &HalfRC
256 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
257
258 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
259 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
260 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
261 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
262
263 Register DstLo = MRI.createVirtualRegister(&HalfRC);
264 Register DstHi = MRI.createVirtualRegister(&HalfRC);
265
266 if (IsSALU) {
267 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
268 .add(Lo1)
269 .add(Lo2);
270 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
271 .add(Hi1)
272 .add(Hi2);
273 } else {
274 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
275 Register CarryReg = MRI.createVirtualRegister(CarryRC);
276 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
277 .addDef(CarryReg)
278 .add(Lo1)
279 .add(Lo2)
280 .addImm(0);
281 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
282 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
283 .add(Hi1)
284 .add(Hi2)
285 .addReg(CarryReg, RegState::Kill)
286 .addImm(0);
287 }
288
289 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
290 .addReg(DstLo)
291 .addImm(AMDGPU::sub0)
292 .addReg(DstHi)
293 .addImm(AMDGPU::sub1);
294
295 if (!RBI.constrainGenericRegister(DstReg, RC, MRI) ||
296 !RBI.constrainGenericRegister(I.getOperand(1).getReg(), RC, MRI) ||
297 !RBI.constrainGenericRegister(I.getOperand(2).getReg(), RC, MRI))
298 return false;
299
Tom Stellardca166212017-01-30 21:56:46 +0000300 I.eraseFromParent();
301 return true;
302}
303
Tom Stellard41f32192019-02-28 23:37:48 +0000304bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
305 MachineBasicBlock *BB = I.getParent();
306 MachineFunction *MF = BB->getParent();
307 MachineRegisterInfo &MRI = MF->getRegInfo();
308 assert(I.getOperand(2).getImm() % 32 == 0);
309 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
310 const DebugLoc &DL = I.getDebugLoc();
311 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
312 I.getOperand(0).getReg())
313 .addReg(I.getOperand(1).getReg(), 0, SubReg);
314
315 for (const MachineOperand &MO : Copy->operands()) {
316 const TargetRegisterClass *RC =
317 TRI.getConstrainedRegClassForOperand(MO, MRI);
318 if (!RC)
319 continue;
320 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
321 }
322 I.eraseFromParent();
323 return true;
324}
325
Tom Stellardca166212017-01-30 21:56:46 +0000326bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
327 return selectG_ADD(I);
328}
329
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000330bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
331 MachineBasicBlock *BB = I.getParent();
332 MachineFunction *MF = BB->getParent();
333 MachineRegisterInfo &MRI = MF->getRegInfo();
334 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000335
336 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
337 // regbank check here is to know why getConstrainedRegClassForOperand failed.
338 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
339 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
340 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
341 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
342 return true;
343 }
344
345 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000346}
347
Tom Stellard33634d1b2019-03-01 00:50:26 +0000348bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
349 MachineBasicBlock *BB = I.getParent();
350 MachineFunction *MF = BB->getParent();
351 MachineRegisterInfo &MRI = MF->getRegInfo();
352 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
353 DebugLoc DL = I.getDebugLoc();
354 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
355 .addDef(I.getOperand(0).getReg())
356 .addReg(I.getOperand(1).getReg())
357 .addReg(I.getOperand(2).getReg())
358 .addImm(SubReg);
359
360 for (const MachineOperand &MO : Ins->operands()) {
361 if (!MO.isReg())
362 continue;
363 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
364 continue;
365
366 const TargetRegisterClass *RC =
367 TRI.getConstrainedRegClassForOperand(MO, MRI);
368 if (!RC)
369 continue;
370 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
371 }
372 I.eraseFromParent();
373 return true;
374}
375
Tom Stellarda9284732018-06-14 19:26:37 +0000376bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
377 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000378 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000379 switch (IntrinsicID) {
380 default:
381 break;
Tom Stellardac684712018-07-13 22:16:03 +0000382 case Intrinsic::maxnum:
383 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000384 case Intrinsic::amdgcn_cvt_pkrtz:
385 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000386
387 case Intrinsic::amdgcn_kernarg_segment_ptr: {
388 MachineFunction *MF = I.getParent()->getParent();
389 MachineRegisterInfo &MRI = MF->getRegInfo();
390 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
391 const ArgDescriptor *InputPtrReg;
392 const TargetRegisterClass *RC;
393 const DebugLoc &DL = I.getDebugLoc();
394
395 std::tie(InputPtrReg, RC)
396 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
397 if (!InputPtrReg)
398 report_fatal_error("missing kernarg segment ptr");
399
400 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
401 .add(I.getOperand(0))
402 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
403 I.eraseFromParent();
404 return true;
405 }
Tom Stellarda9284732018-06-14 19:26:37 +0000406 }
407 return false;
408}
409
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000410static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
411 if (Size != 32 && Size != 64)
412 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000413 switch (P) {
414 default:
415 llvm_unreachable("Unknown condition code!");
416 case CmpInst::ICMP_NE:
417 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
418 case CmpInst::ICMP_EQ:
419 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
420 case CmpInst::ICMP_SGT:
421 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
422 case CmpInst::ICMP_SGE:
423 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
424 case CmpInst::ICMP_SLT:
425 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
426 case CmpInst::ICMP_SLE:
427 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
428 case CmpInst::ICMP_UGT:
429 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
430 case CmpInst::ICMP_UGE:
431 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
432 case CmpInst::ICMP_ULT:
433 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
434 case CmpInst::ICMP_ULE:
435 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
436 }
437}
438
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000439int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
440 unsigned Size) const {
441 if (Size == 64) {
442 if (!STI.hasScalarCompareEq64())
443 return -1;
444
445 switch (P) {
446 case CmpInst::ICMP_NE:
447 return AMDGPU::S_CMP_LG_U64;
448 case CmpInst::ICMP_EQ:
449 return AMDGPU::S_CMP_EQ_U64;
450 default:
451 return -1;
452 }
453 }
454
455 if (Size != 32)
456 return -1;
457
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000458 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000459 case CmpInst::ICMP_NE:
460 return AMDGPU::S_CMP_LG_U32;
461 case CmpInst::ICMP_EQ:
462 return AMDGPU::S_CMP_EQ_U32;
463 case CmpInst::ICMP_SGT:
464 return AMDGPU::S_CMP_GT_I32;
465 case CmpInst::ICMP_SGE:
466 return AMDGPU::S_CMP_GE_I32;
467 case CmpInst::ICMP_SLT:
468 return AMDGPU::S_CMP_LT_I32;
469 case CmpInst::ICMP_SLE:
470 return AMDGPU::S_CMP_LE_I32;
471 case CmpInst::ICMP_UGT:
472 return AMDGPU::S_CMP_GT_U32;
473 case CmpInst::ICMP_UGE:
474 return AMDGPU::S_CMP_GE_U32;
475 case CmpInst::ICMP_ULT:
476 return AMDGPU::S_CMP_LT_U32;
477 case CmpInst::ICMP_ULE:
478 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000479 default:
480 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000481 }
482}
483
484bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
485 MachineBasicBlock *BB = I.getParent();
486 MachineFunction *MF = BB->getParent();
487 MachineRegisterInfo &MRI = MF->getRegInfo();
488 DebugLoc DL = I.getDebugLoc();
489
490 unsigned SrcReg = I.getOperand(2).getReg();
491 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000492
493 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000494
495 unsigned CCReg = I.getOperand(0).getReg();
496 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000497 int Opcode = getS_CMPOpcode(Pred, Size);
498 if (Opcode == -1)
499 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000500 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
501 .add(I.getOperand(2))
502 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000503 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
504 .addReg(AMDGPU::SCC);
505 bool Ret =
506 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
507 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000508 I.eraseFromParent();
509 return Ret;
510 }
511
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000512 int Opcode = getV_CMPOpcode(Pred, Size);
513 if (Opcode == -1)
514 return false;
515
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000516 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
517 I.getOperand(0).getReg())
518 .add(I.getOperand(2))
519 .add(I.getOperand(3));
520 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
521 AMDGPU::SReg_64RegClass, MRI);
522 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
523 I.eraseFromParent();
524 return Ret;
525}
526
Tom Stellard390a5f42018-07-13 21:05:14 +0000527static MachineInstr *
528buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
529 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
530 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
531 const DebugLoc &DL = Insert->getDebugLoc();
532 MachineBasicBlock &BB = *Insert->getParent();
533 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
534 return BuildMI(BB, Insert, DL, TII.get(Opcode))
535 .addImm(Tgt)
536 .addReg(Reg0)
537 .addReg(Reg1)
538 .addReg(Reg2)
539 .addReg(Reg3)
540 .addImm(VM)
541 .addImm(Compr)
542 .addImm(Enabled);
543}
544
545bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
546 MachineInstr &I,
547 CodeGenCoverage &CoverageInfo) const {
548 MachineBasicBlock *BB = I.getParent();
549 MachineFunction *MF = BB->getParent();
550 MachineRegisterInfo &MRI = MF->getRegInfo();
551
552 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
553 switch (IntrinsicID) {
554 case Intrinsic::amdgcn_exp: {
555 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
556 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
557 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
558 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
559
560 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
561 I.getOperand(4).getReg(),
562 I.getOperand(5).getReg(),
563 I.getOperand(6).getReg(),
564 VM, false, Enabled, Done);
565
566 I.eraseFromParent();
567 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
568 }
569 case Intrinsic::amdgcn_exp_compr: {
570 const DebugLoc &DL = I.getDebugLoc();
571 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
572 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
573 unsigned Reg0 = I.getOperand(3).getReg();
574 unsigned Reg1 = I.getOperand(4).getReg();
575 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
576 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
577 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
578
579 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
580 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
581 true, Enabled, Done);
582
583 I.eraseFromParent();
584 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
585 }
586 }
587 return false;
588}
589
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000590bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
591 MachineBasicBlock *BB = I.getParent();
592 MachineFunction *MF = BB->getParent();
593 MachineRegisterInfo &MRI = MF->getRegInfo();
594 const DebugLoc &DL = I.getDebugLoc();
595
596 unsigned DstReg = I.getOperand(0).getReg();
597 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000598 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000599 const MachineOperand &CCOp = I.getOperand(1);
600 unsigned CCReg = CCOp.getReg();
601 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000602 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
603 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000604 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
605 .addReg(CCReg);
606
607 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
608 // bank, because it does not cover the register class that we used to represent
609 // for it. So we need to manually set the register class here.
610 if (!MRI.getRegClassOrNull(CCReg))
611 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
612 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
613 .add(I.getOperand(2))
614 .add(I.getOperand(3));
615
616 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
617 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
618 I.eraseFromParent();
619 return Ret;
620 }
621
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000622 // Wide VGPR select should have been split in RegBankSelect.
623 if (Size > 32)
624 return false;
625
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000626 MachineInstr *Select =
627 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
628 .addImm(0)
629 .add(I.getOperand(3))
630 .addImm(0)
631 .add(I.getOperand(2))
632 .add(I.getOperand(1));
633
634 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
635 I.eraseFromParent();
636 return Ret;
637}
638
Tom Stellardca166212017-01-30 21:56:46 +0000639bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
640 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000641 MachineFunction *MF = BB->getParent();
642 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000643 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000644 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
645 if (PtrSize != 64) {
646 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
647 return false;
648 }
649
Tom Stellard655fdd32018-05-11 23:12:49 +0000650 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
651 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000652
653 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000654 switch (StoreSize) {
655 default:
656 return false;
657 case 32:
658 Opcode = AMDGPU::FLAT_STORE_DWORD;
659 break;
660 case 64:
661 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
662 break;
663 case 96:
664 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
665 break;
666 case 128:
667 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
668 break;
669 }
670
671 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000672 .add(I.getOperand(1))
673 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000674 .addImm(0) // offset
675 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000676 .addImm(0) // slc
677 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000678
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000679
Tom Stellardca166212017-01-30 21:56:46 +0000680 // Now that we selected an opcode, we need to constrain the register
681 // operands to use appropriate classes.
682 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
683
684 I.eraseFromParent();
685 return Ret;
686}
687
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000688static int sizeToSubRegIndex(unsigned Size) {
689 switch (Size) {
690 case 32:
691 return AMDGPU::sub0;
692 case 64:
693 return AMDGPU::sub0_sub1;
694 case 96:
695 return AMDGPU::sub0_sub1_sub2;
696 case 128:
697 return AMDGPU::sub0_sub1_sub2_sub3;
698 case 256:
699 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
700 default:
701 if (Size < 32)
702 return AMDGPU::sub0;
703 if (Size > 256)
704 return -1;
705 return sizeToSubRegIndex(PowerOf2Ceil(Size));
706 }
707}
708
709bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
710 MachineBasicBlock *BB = I.getParent();
711 MachineFunction *MF = BB->getParent();
712 MachineRegisterInfo &MRI = MF->getRegInfo();
713
714 unsigned DstReg = I.getOperand(0).getReg();
715 unsigned SrcReg = I.getOperand(1).getReg();
716 const LLT DstTy = MRI.getType(DstReg);
717 const LLT SrcTy = MRI.getType(SrcReg);
718 if (!DstTy.isScalar())
719 return false;
720
721 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
722 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
723 if (SrcRB != DstRB)
724 return false;
725
726 unsigned DstSize = DstTy.getSizeInBits();
727 unsigned SrcSize = SrcTy.getSizeInBits();
728
729 const TargetRegisterClass *SrcRC
730 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
731 const TargetRegisterClass *DstRC
732 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
733
734 if (SrcSize > 32) {
735 int SubRegIdx = sizeToSubRegIndex(DstSize);
736 if (SubRegIdx == -1)
737 return false;
738
739 // Deal with weird cases where the class only partially supports the subreg
740 // index.
741 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
742 if (!SrcRC)
743 return false;
744
745 I.getOperand(1).setSubReg(SubRegIdx);
746 }
747
748 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
749 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
750 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
751 return false;
752 }
753
754 I.setDesc(TII.get(TargetOpcode::COPY));
755 return true;
756}
757
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000758/// \returns true if a bitmask for \p Size bits will be an inline immediate.
759static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
760 Mask = maskTrailingOnes<unsigned>(Size);
761 int SignedMask = static_cast<int>(Mask);
762 return SignedMask >= -16 && SignedMask <= 64;
763}
764
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000765bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
766 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
767 const DebugLoc &DL = I.getDebugLoc();
768 MachineBasicBlock &MBB = *I.getParent();
769 MachineFunction &MF = *MBB.getParent();
770 MachineRegisterInfo &MRI = MF.getRegInfo();
771 const unsigned DstReg = I.getOperand(0).getReg();
772 const unsigned SrcReg = I.getOperand(1).getReg();
773
774 const LLT DstTy = MRI.getType(DstReg);
775 const LLT SrcTy = MRI.getType(SrcReg);
776 const LLT S1 = LLT::scalar(1);
777 const unsigned SrcSize = SrcTy.getSizeInBits();
778 const unsigned DstSize = DstTy.getSizeInBits();
779 if (!DstTy.isScalar())
780 return false;
781
782 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
783
784 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
785 if (SrcTy != S1 || DstSize > 64) // Invalid
786 return false;
787
788 unsigned Opcode =
789 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
790 const TargetRegisterClass *DstRC =
791 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
792
793 // FIXME: Create an extra copy to avoid incorrectly constraining the result
794 // of the scc producer.
795 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
796 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
797 .addReg(SrcReg);
798 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
799 .addReg(TmpReg);
800
801 // The instruction operands are backwards from what you would expect.
802 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
803 .addImm(0)
804 .addImm(Signed ? -1 : 1);
805 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
806 }
807
808 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
809 if (SrcTy != S1) // Invalid
810 return false;
811
812 MachineInstr *ExtI =
813 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
814 .addImm(0) // src0_modifiers
815 .addImm(0) // src0
816 .addImm(0) // src1_modifiers
817 .addImm(Signed ? -1 : 1) // src1
818 .addUse(SrcReg);
819 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
820 }
821
822 if (I.getOpcode() == AMDGPU::G_ANYEXT)
823 return selectCOPY(I);
824
825 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
826 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000827
828 // Try to use an and with a mask if it will save code size.
829 unsigned Mask;
830 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
831 MachineInstr *ExtI =
832 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
833 .addImm(Mask)
834 .addReg(SrcReg);
835 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
836 }
837
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000838 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
839 MachineInstr *ExtI =
840 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
841 .addReg(SrcReg)
842 .addImm(0) // Offset
843 .addImm(SrcSize); // Width
844 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
845 }
846
847 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
848 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
849 return false;
850
851 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
852 const unsigned SextOpc = SrcSize == 8 ?
853 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
854 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
855 .addReg(SrcReg);
856 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
857 }
858
859 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
860 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
861
862 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
863 if (DstSize > 32 && SrcSize <= 32) {
864 // We need a 64-bit register source, but the high bits don't matter.
865 unsigned ExtReg
866 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
867 unsigned UndefReg
868 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
869 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
870 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
871 .addReg(SrcReg)
872 .addImm(AMDGPU::sub0)
873 .addReg(UndefReg)
874 .addImm(AMDGPU::sub1);
875
876 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
877 .addReg(ExtReg)
878 .addImm(SrcSize << 16);
879
880 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
881 }
882
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000883 unsigned Mask;
884 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
885 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
886 .addReg(SrcReg)
887 .addImm(Mask);
888 } else {
889 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
890 .addReg(SrcReg)
891 .addImm(SrcSize << 16);
892 }
893
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000894 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
895 }
896
897 return false;
898}
899
Tom Stellardca166212017-01-30 21:56:46 +0000900bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
901 MachineBasicBlock *BB = I.getParent();
902 MachineFunction *MF = BB->getParent();
903 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000904 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000905
Tom Stellarde182b282018-05-15 17:57:09 +0000906 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
907 if (ImmOp.isFPImm()) {
908 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
909 ImmOp.ChangeToImmediate(Imm.getZExtValue());
910 } else if (ImmOp.isCImm()) {
911 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
912 }
913
914 unsigned DstReg = I.getOperand(0).getReg();
915 unsigned Size;
916 bool IsSgpr;
917 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
918 if (RB) {
919 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
920 Size = MRI.getType(DstReg).getSizeInBits();
921 } else {
922 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
923 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000924 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000925 }
926
927 if (Size != 32 && Size != 64)
928 return false;
929
930 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000931 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000932 I.setDesc(TII.get(Opcode));
933 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000934 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
935 }
936
Tom Stellardca166212017-01-30 21:56:46 +0000937 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000938 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
939 &AMDGPU::VGPR_32RegClass;
940 unsigned LoReg = MRI.createVirtualRegister(RC);
941 unsigned HiReg = MRI.createVirtualRegister(RC);
942 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000943
Tom Stellarde182b282018-05-15 17:57:09 +0000944 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000945 .addImm(Imm.trunc(32).getZExtValue());
946
Tom Stellarde182b282018-05-15 17:57:09 +0000947 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000948 .addImm(Imm.ashr(32).getZExtValue());
949
Tom Stellarde182b282018-05-15 17:57:09 +0000950 const MachineInstr *RS =
951 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
952 .addReg(LoReg)
953 .addImm(AMDGPU::sub0)
954 .addReg(HiReg)
955 .addImm(AMDGPU::sub1);
956
Tom Stellardca166212017-01-30 21:56:46 +0000957 // We can't call constrainSelectedInstRegOperands here, because it doesn't
958 // work for target independent opcodes
959 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000960 const TargetRegisterClass *DstRC =
961 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
962 if (!DstRC)
963 return true;
964 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000965}
966
967static bool isConstant(const MachineInstr &MI) {
968 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
969}
970
971void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
972 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
973
974 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
975
976 assert(PtrMI);
977
978 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
979 return;
980
981 GEPInfo GEPInfo(*PtrMI);
982
983 for (unsigned i = 1, e = 3; i < e; ++i) {
984 const MachineOperand &GEPOp = PtrMI->getOperand(i);
985 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
986 assert(OpDef);
987 if (isConstant(*OpDef)) {
988 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
989 // are lacking other optimizations.
990 assert(GEPInfo.Imm == 0);
991 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
992 continue;
993 }
994 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
995 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
996 GEPInfo.SgprParts.push_back(GEPOp.getReg());
997 else
998 GEPInfo.VgprParts.push_back(GEPOp.getReg());
999 }
1000
1001 AddrInfo.push_back(GEPInfo);
1002 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1003}
1004
Tom Stellard79b5c382019-02-20 21:02:37 +00001005bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001006 if (!MI.hasOneMemOperand())
1007 return false;
1008
1009 const MachineMemOperand *MMO = *MI.memoperands_begin();
1010 const Value *Ptr = MMO->getValue();
1011
1012 // UndefValue means this is a load of a kernel input. These are uniform.
1013 // Sometimes LDS instructions have constant pointers.
1014 // If Ptr is null, then that means this mem operand contains a
1015 // PseudoSourceValue like GOT.
1016 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1017 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1018 return true;
1019
Matt Arsenault923712b2018-02-09 16:57:57 +00001020 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1021 return true;
1022
Tom Stellardca166212017-01-30 21:56:46 +00001023 const Instruction *I = dyn_cast<Instruction>(Ptr);
1024 return I && I->getMetadata("amdgpu.uniform");
1025}
1026
Tom Stellardca166212017-01-30 21:56:46 +00001027bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1028 for (const GEPInfo &GEPInfo : AddrInfo) {
1029 if (!GEPInfo.VgprParts.empty())
1030 return true;
1031 }
1032 return false;
1033}
1034
Tom Stellardca166212017-01-30 21:56:46 +00001035bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1036 MachineBasicBlock *BB = I.getParent();
1037 MachineFunction *MF = BB->getParent();
1038 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001039 const DebugLoc &DL = I.getDebugLoc();
1040 Register DstReg = I.getOperand(0).getReg();
1041 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001042 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1043 unsigned Opcode;
1044
Matt Arsenaulta3107272019-07-01 16:36:39 +00001045 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1046 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1047 return false;
1048 }
1049
Tom Stellardca166212017-01-30 21:56:46 +00001050 SmallVector<GEPInfo, 4> AddrInfo;
1051
1052 getAddrModeInfo(I, MRI, AddrInfo);
1053
Tom Stellardca166212017-01-30 21:56:46 +00001054 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001055 case 32:
1056 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1057 break;
1058 case 64:
1059 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1060 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001061 default:
1062 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1063 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001064 }
1065
1066 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1067 .add(I.getOperand(0))
1068 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001069 .addImm(0) // offset
1070 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001071 .addImm(0) // slc
1072 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001073
1074 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1075 I.eraseFromParent();
1076 return Ret;
1077}
1078
Matt Arsenault64642802019-07-01 15:39:27 +00001079bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1080 MachineBasicBlock *BB = I.getParent();
1081 MachineFunction *MF = BB->getParent();
1082 MachineRegisterInfo &MRI = MF->getRegInfo();
1083 MachineOperand &CondOp = I.getOperand(0);
1084 Register CondReg = CondOp.getReg();
1085 const DebugLoc &DL = I.getDebugLoc();
1086
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001087 unsigned BrOpcode;
1088 Register CondPhysReg;
1089 const TargetRegisterClass *ConstrainRC;
1090
1091 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1092 // whether the branch is uniform when selecting the instruction. In
1093 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1094 // RegBankSelect knows what it's doing if the branch condition is scc, even
1095 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001096 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001097 CondPhysReg = AMDGPU::SCC;
1098 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1099 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1100 } else if (isVCC(CondReg, MRI)) {
1101 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1102 // We sort of know that a VCC producer based on the register bank, that ands
1103 // inactive lanes with 0. What if there was a logical operation with vcc
1104 // producers in different blocks/with different exec masks?
1105 // FIXME: Should scc->vcc copies and with exec?
1106 CondPhysReg = TRI.getVCC();
1107 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1108 ConstrainRC = TRI.getBoolRC();
1109 } else
1110 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001111
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001112 if (!MRI.getRegClassOrNull(CondReg))
1113 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001114
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001115 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1116 .addReg(CondReg);
1117 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1118 .addMBB(I.getOperand(1).getMBB());
1119
1120 I.eraseFromParent();
1121 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001122}
1123
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001124bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1125 MachineBasicBlock *BB = I.getParent();
1126 MachineFunction *MF = BB->getParent();
1127 MachineRegisterInfo &MRI = MF->getRegInfo();
1128
1129 Register DstReg = I.getOperand(0).getReg();
1130 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1131 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1132 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1133 if (IsVGPR)
1134 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1135
1136 return RBI.constrainGenericRegister(
1137 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1138}
1139
Daniel Sandersf76f3152017-11-16 00:46:35 +00001140bool AMDGPUInstructionSelector::select(MachineInstr &I,
1141 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001142 if (I.isPHI())
1143 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001144
Tom Stellard7712ee82018-06-22 00:44:29 +00001145 if (!isPreISelGenericOpcode(I.getOpcode())) {
1146 if (I.isCopy())
1147 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001148 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001149 }
Tom Stellardca166212017-01-30 21:56:46 +00001150
1151 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001152 case TargetOpcode::G_ADD:
1153 if (selectG_ADD(I))
1154 return true;
1155 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001156 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001157 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001158 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001159 case TargetOpcode::G_BITCAST:
1160 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001161 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001162 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001163 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001164 case TargetOpcode::G_EXTRACT:
1165 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001166 case TargetOpcode::G_GEP:
1167 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001168 case TargetOpcode::G_IMPLICIT_DEF:
1169 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001170 case TargetOpcode::G_INSERT:
1171 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001172 case TargetOpcode::G_INTRINSIC:
1173 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001174 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1175 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001176 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001177 if (selectG_ICMP(I))
1178 return true;
1179 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001180 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001181 if (selectImpl(I, CoverageInfo))
1182 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001183 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001184 case TargetOpcode::G_SELECT:
1185 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001186 case TargetOpcode::G_STORE:
1187 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001188 case TargetOpcode::G_TRUNC:
1189 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001190 case TargetOpcode::G_SEXT:
1191 case TargetOpcode::G_ZEXT:
1192 case TargetOpcode::G_ANYEXT:
1193 if (selectG_SZA_EXT(I)) {
1194 I.eraseFromParent();
1195 return true;
1196 }
1197
1198 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001199 case TargetOpcode::G_BRCOND:
1200 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001201 case TargetOpcode::G_FRAME_INDEX:
1202 return selectG_FRAME_INDEX(I);
Tom Stellardca166212017-01-30 21:56:46 +00001203 }
1204 return false;
1205}
Tom Stellard1dc90202018-05-10 20:53:06 +00001206
Tom Stellard26fac0f2018-06-22 02:54:57 +00001207InstructionSelector::ComplexRendererFns
1208AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1209 return {{
1210 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1211 }};
1212
1213}
1214
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001215std::pair<Register, unsigned>
1216AMDGPUInstructionSelector::selectVOP3ModsImpl(
1217 Register Src, const MachineRegisterInfo &MRI) const {
1218 unsigned Mods = 0;
1219 MachineInstr *MI = MRI.getVRegDef(Src);
1220
1221 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1222 Src = MI->getOperand(1).getReg();
1223 Mods |= SISrcMods::NEG;
1224 MI = MRI.getVRegDef(Src);
1225 }
1226
1227 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1228 Src = MI->getOperand(1).getReg();
1229 Mods |= SISrcMods::ABS;
1230 }
1231
1232 return std::make_pair(Src, Mods);
1233}
1234
Tom Stellard1dc90202018-05-10 20:53:06 +00001235///
1236/// This will select either an SGPR or VGPR operand and will save us from
1237/// having to write an extra tablegen pattern.
1238InstructionSelector::ComplexRendererFns
1239AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1240 return {{
1241 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1242 }};
1243}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001244
1245InstructionSelector::ComplexRendererFns
1246AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001247 MachineRegisterInfo &MRI
1248 = Root.getParent()->getParent()->getParent()->getRegInfo();
1249
1250 Register Src;
1251 unsigned Mods;
1252 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1253
Tom Stellarddcc95e92018-05-11 05:44:16 +00001254 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001255 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1256 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1257 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1258 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001259 }};
1260}
Tom Stellard9a653572018-06-22 02:34:29 +00001261InstructionSelector::ComplexRendererFns
1262AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1263 return {{
1264 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1265 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1266 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1267 }};
1268}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001269
1270InstructionSelector::ComplexRendererFns
1271AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001272 MachineRegisterInfo &MRI
1273 = Root.getParent()->getParent()->getParent()->getRegInfo();
1274
1275 Register Src;
1276 unsigned Mods;
1277 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1278
Tom Stellard46bbbc32018-06-13 22:30:47 +00001279 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001280 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1281 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001282 }};
1283}
Tom Stellard79b5c382019-02-20 21:02:37 +00001284
1285InstructionSelector::ComplexRendererFns
1286AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1287 MachineRegisterInfo &MRI =
1288 Root.getParent()->getParent()->getParent()->getRegInfo();
1289
1290 SmallVector<GEPInfo, 4> AddrInfo;
1291 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1292
1293 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1294 return None;
1295
1296 const GEPInfo &GEPInfo = AddrInfo[0];
1297
1298 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1299 return None;
1300
1301 unsigned PtrReg = GEPInfo.SgprParts[0];
1302 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1303 return {{
1304 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1305 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1306 }};
1307}
1308
1309InstructionSelector::ComplexRendererFns
1310AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1311 MachineRegisterInfo &MRI =
1312 Root.getParent()->getParent()->getParent()->getRegInfo();
1313
1314 SmallVector<GEPInfo, 4> AddrInfo;
1315 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1316
1317 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1318 return None;
1319
1320 const GEPInfo &GEPInfo = AddrInfo[0];
1321 unsigned PtrReg = GEPInfo.SgprParts[0];
1322 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1323 if (!isUInt<32>(EncodedImm))
1324 return None;
1325
1326 return {{
1327 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1328 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1329 }};
1330}
1331
1332InstructionSelector::ComplexRendererFns
1333AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1334 MachineInstr *MI = Root.getParent();
1335 MachineBasicBlock *MBB = MI->getParent();
1336 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1337
1338 SmallVector<GEPInfo, 4> AddrInfo;
1339 getAddrModeInfo(*MI, MRI, AddrInfo);
1340
1341 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1342 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1343 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1344 return None;
1345
1346 const GEPInfo &GEPInfo = AddrInfo[0];
1347 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1348 return None;
1349
1350 // If we make it this far we have a load with an 32-bit immediate offset.
1351 // It is OK to select this using a sgpr offset, because we have already
1352 // failed trying to select this load into one of the _IMM variants since
1353 // the _IMM Patterns are considered before the _SGPR patterns.
1354 unsigned PtrReg = GEPInfo.SgprParts[0];
1355 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1356 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1357 .addImm(GEPInfo.Imm);
1358 return {{
1359 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1360 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1361 }};
1362}