blob: cc9289fa82edc1609f105e0e539f9eb5dea103f1 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +000089 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault9f992c22019-07-01 13:22:07 +000090 MRI.getType(Reg).getSizeInBits() == 1;
91 }
92
93 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
94 return RB->getID() == AMDGPU::VCCRegBankID;
95}
96
Tom Stellard1e0edad2018-05-10 21:20:10 +000097bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
98 MachineBasicBlock *BB = I.getParent();
99 MachineFunction *MF = BB->getParent();
100 MachineRegisterInfo &MRI = MF->getRegInfo();
101 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000102
103 // Special case for COPY from the scc register bank. The scc register bank
104 // is modeled using 32-bit sgprs.
105 const MachineOperand &Src = I.getOperand(1);
106 unsigned SrcReg = Src.getReg();
107 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault9f992c22019-07-01 13:22:07 +0000110 // Specially handle scc->vcc copies.
Matt Arsenault2ab25f92019-07-01 16:06:02 +0000111 if (isVCC(DstReg, MRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000113 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000114 .addImm(0)
115 .addReg(SrcReg);
116 if (!MRI.getRegClassOrNull(SrcReg))
117 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
118 I.eraseFromParent();
119 return true;
120 }
121 }
122
Tom Stellard1e0edad2018-05-10 21:20:10 +0000123 for (const MachineOperand &MO : I.operands()) {
124 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
125 continue;
126
127 const TargetRegisterClass *RC =
128 TRI.getConstrainedRegClassForOperand(MO, MRI);
129 if (!RC)
130 continue;
131 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
132 }
133 return true;
134}
135
Matt Arsenaulte1006252019-07-01 16:32:47 +0000136bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
137 MachineBasicBlock *BB = I.getParent();
138 MachineFunction *MF = BB->getParent();
139 MachineRegisterInfo &MRI = MF->getRegInfo();
140
141 const Register DefReg = I.getOperand(0).getReg();
142 const LLT DefTy = MRI.getType(DefReg);
143
144 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
145
146 const RegClassOrRegBank &RegClassOrBank =
147 MRI.getRegClassOrRegBank(DefReg);
148
149 const TargetRegisterClass *DefRC
150 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
151 if (!DefRC) {
152 if (!DefTy.isValid()) {
153 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
154 return false;
155 }
156
157 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
158 if (RB.getID() == AMDGPU::SCCRegBankID) {
159 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
160 return false;
161 }
162
163 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
164 if (!DefRC) {
165 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
166 return false;
167 }
168 }
169
170 I.setDesc(TII.get(TargetOpcode::PHI));
171 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
172}
173
Tom Stellardca166212017-01-30 21:56:46 +0000174MachineOperand
175AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000176 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000177 unsigned SubIdx) const {
178
179 MachineInstr *MI = MO.getParent();
180 MachineBasicBlock *BB = MO.getParent()->getParent();
181 MachineFunction *MF = BB->getParent();
182 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000183 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000184
185 if (MO.isReg()) {
186 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
187 unsigned Reg = MO.getReg();
188 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
189 .addReg(Reg, 0, ComposedSubIdx);
190
191 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
192 MO.isKill(), MO.isDead(), MO.isUndef(),
193 MO.isEarlyClobber(), 0, MO.isDebug(),
194 MO.isInternalRead());
195 }
196
197 assert(MO.isImm());
198
199 APInt Imm(64, MO.getImm());
200
201 switch (SubIdx) {
202 default:
203 llvm_unreachable("do not know to split immediate with this sub index.");
204 case AMDGPU::sub0:
205 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
206 case AMDGPU::sub1:
207 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
208 }
209}
210
Tom Stellard390a5f42018-07-13 21:05:14 +0000211static int64_t getConstant(const MachineInstr *MI) {
212 return MI->getOperand(1).getCImm()->getSExtValue();
213}
214
Tom Stellardca166212017-01-30 21:56:46 +0000215bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
216 MachineBasicBlock *BB = I.getParent();
217 MachineFunction *MF = BB->getParent();
218 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000219 Register DstReg = I.getOperand(0).getReg();
220 const DebugLoc &DL = I.getDebugLoc();
221 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
222 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
223 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Tom Stellardca166212017-01-30 21:56:46 +0000224
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000225 if (Size == 32) {
226 if (IsSALU) {
227 MachineInstr *Add =
228 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstReg)
229 .add(I.getOperand(1))
230 .add(I.getOperand(2));
231 I.eraseFromParent();
232 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
233 }
Tom Stellardca166212017-01-30 21:56:46 +0000234
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000235 if (STI.hasAddNoCarry()) {
236 I.setDesc(TII.get(AMDGPU::V_ADD_U32_e64));
237 I.addOperand(*MF, MachineOperand::CreateImm(0));
238 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
239 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
240 }
Tom Stellardca166212017-01-30 21:56:46 +0000241
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000242 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
243 MachineInstr *Add
244 = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstReg)
245 .addDef(UnusedCarry, RegState::Dead)
246 .add(I.getOperand(1))
247 .add(I.getOperand(2))
248 .addImm(0);
249 I.eraseFromParent();
250 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000251 }
252
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000253 const TargetRegisterClass &RC
254 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
255 const TargetRegisterClass &HalfRC
256 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
257
258 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
259 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
260 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
261 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
262
263 Register DstLo = MRI.createVirtualRegister(&HalfRC);
264 Register DstHi = MRI.createVirtualRegister(&HalfRC);
265
266 if (IsSALU) {
267 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
268 .add(Lo1)
269 .add(Lo2);
270 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
271 .add(Hi1)
272 .add(Hi2);
273 } else {
274 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
275 Register CarryReg = MRI.createVirtualRegister(CarryRC);
276 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
277 .addDef(CarryReg)
278 .add(Lo1)
279 .add(Lo2)
280 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000281 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000282 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
283 .add(Hi1)
284 .add(Hi2)
285 .addReg(CarryReg, RegState::Kill)
286 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000287
288 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
289 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000290 }
291
292 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
293 .addReg(DstLo)
294 .addImm(AMDGPU::sub0)
295 .addReg(DstHi)
296 .addImm(AMDGPU::sub1);
297
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000298
299 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000300 return false;
301
Tom Stellardca166212017-01-30 21:56:46 +0000302 I.eraseFromParent();
303 return true;
304}
305
Tom Stellard41f32192019-02-28 23:37:48 +0000306bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
307 MachineBasicBlock *BB = I.getParent();
308 MachineFunction *MF = BB->getParent();
309 MachineRegisterInfo &MRI = MF->getRegInfo();
310 assert(I.getOperand(2).getImm() % 32 == 0);
311 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
312 const DebugLoc &DL = I.getDebugLoc();
313 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
314 I.getOperand(0).getReg())
315 .addReg(I.getOperand(1).getReg(), 0, SubReg);
316
317 for (const MachineOperand &MO : Copy->operands()) {
318 const TargetRegisterClass *RC =
319 TRI.getConstrainedRegClassForOperand(MO, MRI);
320 if (!RC)
321 continue;
322 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
323 }
324 I.eraseFromParent();
325 return true;
326}
327
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000328bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
329 MachineBasicBlock *BB = MI.getParent();
330 MachineFunction *MF = BB->getParent();
331 MachineRegisterInfo &MRI = MF->getRegInfo();
332 Register DstReg = MI.getOperand(0).getReg();
333 LLT DstTy = MRI.getType(DstReg);
334 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
335
336 const unsigned SrcSize = SrcTy.getSizeInBits();
337 const DebugLoc &DL = MI.getDebugLoc();
338 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
339 const unsigned DstSize = DstTy.getSizeInBits();
340 const TargetRegisterClass *DstRC =
341 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
342 if (!DstRC)
343 return false;
344
345 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
346 MachineInstrBuilder MIB =
347 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
348 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
349 MachineOperand &Src = MI.getOperand(I + 1);
350 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
351 MIB.addImm(SubRegs[I]);
352
353 const TargetRegisterClass *SrcRC
354 = TRI.getConstrainedRegClassForOperand(Src, MRI);
355 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
356 return false;
357 }
358
359 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
360 return false;
361
362 MI.eraseFromParent();
363 return true;
364}
365
Matt Arsenault872f38b2019-07-09 14:02:26 +0000366bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
367 MachineBasicBlock *BB = MI.getParent();
368 MachineFunction *MF = BB->getParent();
369 MachineRegisterInfo &MRI = MF->getRegInfo();
370 const int NumDst = MI.getNumOperands() - 1;
371
372 MachineOperand &Src = MI.getOperand(NumDst);
373
374 Register SrcReg = Src.getReg();
375 Register DstReg0 = MI.getOperand(0).getReg();
376 LLT DstTy = MRI.getType(DstReg0);
377 LLT SrcTy = MRI.getType(SrcReg);
378
379 const unsigned DstSize = DstTy.getSizeInBits();
380 const unsigned SrcSize = SrcTy.getSizeInBits();
381 const DebugLoc &DL = MI.getDebugLoc();
382 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
383
384 const TargetRegisterClass *SrcRC =
385 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
386 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
387 return false;
388
389 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
390
391 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
392 // source, and this relies on the fact that the same subregister indices are
393 // used for both.
394 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
395 for (int I = 0, E = NumDst; I != E; ++I) {
396 MachineOperand &Dst = MI.getOperand(I);
397 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
398 .addReg(SrcReg, SrcFlags, SubRegs[I]);
399
400 const TargetRegisterClass *DstRC =
401 TRI.getConstrainedRegClassForOperand(Dst, MRI);
402 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
403 return false;
404 }
405
406 MI.eraseFromParent();
407 return true;
408}
409
Tom Stellardca166212017-01-30 21:56:46 +0000410bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
411 return selectG_ADD(I);
412}
413
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000414bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
415 MachineBasicBlock *BB = I.getParent();
416 MachineFunction *MF = BB->getParent();
417 MachineRegisterInfo &MRI = MF->getRegInfo();
418 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000419
420 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
421 // regbank check here is to know why getConstrainedRegClassForOperand failed.
422 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
423 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
424 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
425 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
426 return true;
427 }
428
429 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000430}
431
Tom Stellard33634d1b2019-03-01 00:50:26 +0000432bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
433 MachineBasicBlock *BB = I.getParent();
434 MachineFunction *MF = BB->getParent();
435 MachineRegisterInfo &MRI = MF->getRegInfo();
436 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
437 DebugLoc DL = I.getDebugLoc();
438 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
439 .addDef(I.getOperand(0).getReg())
440 .addReg(I.getOperand(1).getReg())
441 .addReg(I.getOperand(2).getReg())
442 .addImm(SubReg);
443
444 for (const MachineOperand &MO : Ins->operands()) {
445 if (!MO.isReg())
446 continue;
447 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
448 continue;
449
450 const TargetRegisterClass *RC =
451 TRI.getConstrainedRegClassForOperand(MO, MRI);
452 if (!RC)
453 continue;
454 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
455 }
456 I.eraseFromParent();
457 return true;
458}
459
Matt Arsenault50be3482019-07-02 14:52:16 +0000460bool AMDGPUInstructionSelector::selectG_INTRINSIC(
461 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000462 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000463 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000464 case Intrinsic::maxnum:
465 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000466 case Intrinsic::amdgcn_cvt_pkrtz:
467 return selectImpl(I, CoverageInfo);
Matt Arsenault50be3482019-07-02 14:52:16 +0000468 default:
469 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000470 }
Tom Stellarda9284732018-06-14 19:26:37 +0000471}
472
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000473static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
474 if (Size != 32 && Size != 64)
475 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000476 switch (P) {
477 default:
478 llvm_unreachable("Unknown condition code!");
479 case CmpInst::ICMP_NE:
480 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
481 case CmpInst::ICMP_EQ:
482 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
483 case CmpInst::ICMP_SGT:
484 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
485 case CmpInst::ICMP_SGE:
486 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
487 case CmpInst::ICMP_SLT:
488 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
489 case CmpInst::ICMP_SLE:
490 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
491 case CmpInst::ICMP_UGT:
492 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
493 case CmpInst::ICMP_UGE:
494 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
495 case CmpInst::ICMP_ULT:
496 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
497 case CmpInst::ICMP_ULE:
498 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
499 }
500}
501
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000502int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
503 unsigned Size) const {
504 if (Size == 64) {
505 if (!STI.hasScalarCompareEq64())
506 return -1;
507
508 switch (P) {
509 case CmpInst::ICMP_NE:
510 return AMDGPU::S_CMP_LG_U64;
511 case CmpInst::ICMP_EQ:
512 return AMDGPU::S_CMP_EQ_U64;
513 default:
514 return -1;
515 }
516 }
517
518 if (Size != 32)
519 return -1;
520
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000521 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000522 case CmpInst::ICMP_NE:
523 return AMDGPU::S_CMP_LG_U32;
524 case CmpInst::ICMP_EQ:
525 return AMDGPU::S_CMP_EQ_U32;
526 case CmpInst::ICMP_SGT:
527 return AMDGPU::S_CMP_GT_I32;
528 case CmpInst::ICMP_SGE:
529 return AMDGPU::S_CMP_GE_I32;
530 case CmpInst::ICMP_SLT:
531 return AMDGPU::S_CMP_LT_I32;
532 case CmpInst::ICMP_SLE:
533 return AMDGPU::S_CMP_LE_I32;
534 case CmpInst::ICMP_UGT:
535 return AMDGPU::S_CMP_GT_U32;
536 case CmpInst::ICMP_UGE:
537 return AMDGPU::S_CMP_GE_U32;
538 case CmpInst::ICMP_ULT:
539 return AMDGPU::S_CMP_LT_U32;
540 case CmpInst::ICMP_ULE:
541 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000542 default:
543 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000544 }
545}
546
547bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
548 MachineBasicBlock *BB = I.getParent();
549 MachineFunction *MF = BB->getParent();
550 MachineRegisterInfo &MRI = MF->getRegInfo();
551 DebugLoc DL = I.getDebugLoc();
552
553 unsigned SrcReg = I.getOperand(2).getReg();
554 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000555
556 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000557
558 unsigned CCReg = I.getOperand(0).getReg();
559 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000560 int Opcode = getS_CMPOpcode(Pred, Size);
561 if (Opcode == -1)
562 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000563 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
564 .add(I.getOperand(2))
565 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000566 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
567 .addReg(AMDGPU::SCC);
568 bool Ret =
569 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
570 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000571 I.eraseFromParent();
572 return Ret;
573 }
574
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000575 int Opcode = getV_CMPOpcode(Pred, Size);
576 if (Opcode == -1)
577 return false;
578
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000579 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
580 I.getOperand(0).getReg())
581 .add(I.getOperand(2))
582 .add(I.getOperand(3));
583 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
584 AMDGPU::SReg_64RegClass, MRI);
585 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
586 I.eraseFromParent();
587 return Ret;
588}
589
Tom Stellard390a5f42018-07-13 21:05:14 +0000590static MachineInstr *
591buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
592 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
593 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
594 const DebugLoc &DL = Insert->getDebugLoc();
595 MachineBasicBlock &BB = *Insert->getParent();
596 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
597 return BuildMI(BB, Insert, DL, TII.get(Opcode))
598 .addImm(Tgt)
599 .addReg(Reg0)
600 .addReg(Reg1)
601 .addReg(Reg2)
602 .addReg(Reg3)
603 .addImm(VM)
604 .addImm(Compr)
605 .addImm(Enabled);
606}
607
608bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000609 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000610 MachineBasicBlock *BB = I.getParent();
611 MachineFunction *MF = BB->getParent();
612 MachineRegisterInfo &MRI = MF->getRegInfo();
613
614 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
615 switch (IntrinsicID) {
616 case Intrinsic::amdgcn_exp: {
617 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
618 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
619 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
620 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
621
622 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
623 I.getOperand(4).getReg(),
624 I.getOperand(5).getReg(),
625 I.getOperand(6).getReg(),
626 VM, false, Enabled, Done);
627
628 I.eraseFromParent();
629 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
630 }
631 case Intrinsic::amdgcn_exp_compr: {
632 const DebugLoc &DL = I.getDebugLoc();
633 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
634 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
635 unsigned Reg0 = I.getOperand(3).getReg();
636 unsigned Reg1 = I.getOperand(4).getReg();
637 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
638 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
639 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
640
641 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
642 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
643 true, Enabled, Done);
644
645 I.eraseFromParent();
646 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
647 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000648 default:
649 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000650 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000651}
652
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000653bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
654 MachineBasicBlock *BB = I.getParent();
655 MachineFunction *MF = BB->getParent();
656 MachineRegisterInfo &MRI = MF->getRegInfo();
657 const DebugLoc &DL = I.getDebugLoc();
658
659 unsigned DstReg = I.getOperand(0).getReg();
660 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000661 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000662 const MachineOperand &CCOp = I.getOperand(1);
663 unsigned CCReg = CCOp.getReg();
664 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000665 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
666 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000667 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
668 .addReg(CCReg);
669
670 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
671 // bank, because it does not cover the register class that we used to represent
672 // for it. So we need to manually set the register class here.
673 if (!MRI.getRegClassOrNull(CCReg))
674 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
675 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
676 .add(I.getOperand(2))
677 .add(I.getOperand(3));
678
679 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
680 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
681 I.eraseFromParent();
682 return Ret;
683 }
684
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000685 // Wide VGPR select should have been split in RegBankSelect.
686 if (Size > 32)
687 return false;
688
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000689 MachineInstr *Select =
690 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
691 .addImm(0)
692 .add(I.getOperand(3))
693 .addImm(0)
694 .add(I.getOperand(2))
695 .add(I.getOperand(1));
696
697 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
698 I.eraseFromParent();
699 return Ret;
700}
701
Tom Stellardca166212017-01-30 21:56:46 +0000702bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
703 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000704 MachineFunction *MF = BB->getParent();
705 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000706 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000707 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
708 if (PtrSize != 64) {
709 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
710 return false;
711 }
712
Tom Stellard655fdd32018-05-11 23:12:49 +0000713 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
714 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000715
716 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000717 switch (StoreSize) {
718 default:
719 return false;
720 case 32:
721 Opcode = AMDGPU::FLAT_STORE_DWORD;
722 break;
723 case 64:
724 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
725 break;
726 case 96:
727 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
728 break;
729 case 128:
730 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
731 break;
732 }
733
734 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000735 .add(I.getOperand(1))
736 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000737 .addImm(0) // offset
738 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000739 .addImm(0) // slc
740 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000741
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000742
Tom Stellardca166212017-01-30 21:56:46 +0000743 // Now that we selected an opcode, we need to constrain the register
744 // operands to use appropriate classes.
745 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
746
747 I.eraseFromParent();
748 return Ret;
749}
750
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000751static int sizeToSubRegIndex(unsigned Size) {
752 switch (Size) {
753 case 32:
754 return AMDGPU::sub0;
755 case 64:
756 return AMDGPU::sub0_sub1;
757 case 96:
758 return AMDGPU::sub0_sub1_sub2;
759 case 128:
760 return AMDGPU::sub0_sub1_sub2_sub3;
761 case 256:
762 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
763 default:
764 if (Size < 32)
765 return AMDGPU::sub0;
766 if (Size > 256)
767 return -1;
768 return sizeToSubRegIndex(PowerOf2Ceil(Size));
769 }
770}
771
772bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
773 MachineBasicBlock *BB = I.getParent();
774 MachineFunction *MF = BB->getParent();
775 MachineRegisterInfo &MRI = MF->getRegInfo();
776
777 unsigned DstReg = I.getOperand(0).getReg();
778 unsigned SrcReg = I.getOperand(1).getReg();
779 const LLT DstTy = MRI.getType(DstReg);
780 const LLT SrcTy = MRI.getType(SrcReg);
781 if (!DstTy.isScalar())
782 return false;
783
784 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
785 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
786 if (SrcRB != DstRB)
787 return false;
788
789 unsigned DstSize = DstTy.getSizeInBits();
790 unsigned SrcSize = SrcTy.getSizeInBits();
791
792 const TargetRegisterClass *SrcRC
793 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
794 const TargetRegisterClass *DstRC
795 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
796
797 if (SrcSize > 32) {
798 int SubRegIdx = sizeToSubRegIndex(DstSize);
799 if (SubRegIdx == -1)
800 return false;
801
802 // Deal with weird cases where the class only partially supports the subreg
803 // index.
804 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
805 if (!SrcRC)
806 return false;
807
808 I.getOperand(1).setSubReg(SubRegIdx);
809 }
810
811 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
812 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
813 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
814 return false;
815 }
816
817 I.setDesc(TII.get(TargetOpcode::COPY));
818 return true;
819}
820
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000821/// \returns true if a bitmask for \p Size bits will be an inline immediate.
822static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
823 Mask = maskTrailingOnes<unsigned>(Size);
824 int SignedMask = static_cast<int>(Mask);
825 return SignedMask >= -16 && SignedMask <= 64;
826}
827
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000828bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
829 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
830 const DebugLoc &DL = I.getDebugLoc();
831 MachineBasicBlock &MBB = *I.getParent();
832 MachineFunction &MF = *MBB.getParent();
833 MachineRegisterInfo &MRI = MF.getRegInfo();
834 const unsigned DstReg = I.getOperand(0).getReg();
835 const unsigned SrcReg = I.getOperand(1).getReg();
836
837 const LLT DstTy = MRI.getType(DstReg);
838 const LLT SrcTy = MRI.getType(SrcReg);
839 const LLT S1 = LLT::scalar(1);
840 const unsigned SrcSize = SrcTy.getSizeInBits();
841 const unsigned DstSize = DstTy.getSizeInBits();
842 if (!DstTy.isScalar())
843 return false;
844
845 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
846
847 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
848 if (SrcTy != S1 || DstSize > 64) // Invalid
849 return false;
850
851 unsigned Opcode =
852 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
853 const TargetRegisterClass *DstRC =
854 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
855
856 // FIXME: Create an extra copy to avoid incorrectly constraining the result
857 // of the scc producer.
858 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
859 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
860 .addReg(SrcReg);
861 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
862 .addReg(TmpReg);
863
864 // The instruction operands are backwards from what you would expect.
865 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
866 .addImm(0)
867 .addImm(Signed ? -1 : 1);
868 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
869 }
870
871 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
872 if (SrcTy != S1) // Invalid
873 return false;
874
875 MachineInstr *ExtI =
876 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
877 .addImm(0) // src0_modifiers
878 .addImm(0) // src0
879 .addImm(0) // src1_modifiers
880 .addImm(Signed ? -1 : 1) // src1
881 .addUse(SrcReg);
882 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
883 }
884
885 if (I.getOpcode() == AMDGPU::G_ANYEXT)
886 return selectCOPY(I);
887
888 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
889 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000890
891 // Try to use an and with a mask if it will save code size.
892 unsigned Mask;
893 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
894 MachineInstr *ExtI =
895 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
896 .addImm(Mask)
897 .addReg(SrcReg);
898 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
899 }
900
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000901 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
902 MachineInstr *ExtI =
903 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
904 .addReg(SrcReg)
905 .addImm(0) // Offset
906 .addImm(SrcSize); // Width
907 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
908 }
909
910 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
911 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
912 return false;
913
914 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
915 const unsigned SextOpc = SrcSize == 8 ?
916 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
917 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
918 .addReg(SrcReg);
919 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
920 }
921
922 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
923 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
924
925 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
926 if (DstSize > 32 && SrcSize <= 32) {
927 // We need a 64-bit register source, but the high bits don't matter.
928 unsigned ExtReg
929 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
930 unsigned UndefReg
931 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
932 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
933 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
934 .addReg(SrcReg)
935 .addImm(AMDGPU::sub0)
936 .addReg(UndefReg)
937 .addImm(AMDGPU::sub1);
938
939 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
940 .addReg(ExtReg)
941 .addImm(SrcSize << 16);
942
943 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
944 }
945
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000946 unsigned Mask;
947 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
948 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
949 .addReg(SrcReg)
950 .addImm(Mask);
951 } else {
952 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
953 .addReg(SrcReg)
954 .addImm(SrcSize << 16);
955 }
956
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000957 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
958 }
959
960 return false;
961}
962
Tom Stellardca166212017-01-30 21:56:46 +0000963bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
964 MachineBasicBlock *BB = I.getParent();
965 MachineFunction *MF = BB->getParent();
966 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000967 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000968
Tom Stellarde182b282018-05-15 17:57:09 +0000969 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
970 if (ImmOp.isFPImm()) {
971 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
972 ImmOp.ChangeToImmediate(Imm.getZExtValue());
973 } else if (ImmOp.isCImm()) {
974 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
975 }
976
977 unsigned DstReg = I.getOperand(0).getReg();
978 unsigned Size;
979 bool IsSgpr;
980 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
981 if (RB) {
982 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
983 Size = MRI.getType(DstReg).getSizeInBits();
984 } else {
985 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
986 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000987 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000988 }
989
990 if (Size != 32 && Size != 64)
991 return false;
992
993 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000994 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000995 I.setDesc(TII.get(Opcode));
996 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000997 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
998 }
999
Tom Stellardca166212017-01-30 21:56:46 +00001000 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001001 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1002 &AMDGPU::VGPR_32RegClass;
1003 unsigned LoReg = MRI.createVirtualRegister(RC);
1004 unsigned HiReg = MRI.createVirtualRegister(RC);
1005 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001006
Tom Stellarde182b282018-05-15 17:57:09 +00001007 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001008 .addImm(Imm.trunc(32).getZExtValue());
1009
Tom Stellarde182b282018-05-15 17:57:09 +00001010 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001011 .addImm(Imm.ashr(32).getZExtValue());
1012
Tom Stellarde182b282018-05-15 17:57:09 +00001013 const MachineInstr *RS =
1014 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1015 .addReg(LoReg)
1016 .addImm(AMDGPU::sub0)
1017 .addReg(HiReg)
1018 .addImm(AMDGPU::sub1);
1019
Tom Stellardca166212017-01-30 21:56:46 +00001020 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1021 // work for target independent opcodes
1022 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001023 const TargetRegisterClass *DstRC =
1024 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1025 if (!DstRC)
1026 return true;
1027 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001028}
1029
1030static bool isConstant(const MachineInstr &MI) {
1031 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1032}
1033
1034void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1035 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1036
1037 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1038
1039 assert(PtrMI);
1040
1041 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1042 return;
1043
1044 GEPInfo GEPInfo(*PtrMI);
1045
1046 for (unsigned i = 1, e = 3; i < e; ++i) {
1047 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1048 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1049 assert(OpDef);
1050 if (isConstant(*OpDef)) {
1051 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1052 // are lacking other optimizations.
1053 assert(GEPInfo.Imm == 0);
1054 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1055 continue;
1056 }
1057 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1058 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1059 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1060 else
1061 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1062 }
1063
1064 AddrInfo.push_back(GEPInfo);
1065 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1066}
1067
Tom Stellard79b5c382019-02-20 21:02:37 +00001068bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001069 if (!MI.hasOneMemOperand())
1070 return false;
1071
1072 const MachineMemOperand *MMO = *MI.memoperands_begin();
1073 const Value *Ptr = MMO->getValue();
1074
1075 // UndefValue means this is a load of a kernel input. These are uniform.
1076 // Sometimes LDS instructions have constant pointers.
1077 // If Ptr is null, then that means this mem operand contains a
1078 // PseudoSourceValue like GOT.
1079 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1080 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1081 return true;
1082
Matt Arsenault923712b2018-02-09 16:57:57 +00001083 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1084 return true;
1085
Tom Stellardca166212017-01-30 21:56:46 +00001086 const Instruction *I = dyn_cast<Instruction>(Ptr);
1087 return I && I->getMetadata("amdgpu.uniform");
1088}
1089
Tom Stellardca166212017-01-30 21:56:46 +00001090bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1091 for (const GEPInfo &GEPInfo : AddrInfo) {
1092 if (!GEPInfo.VgprParts.empty())
1093 return true;
1094 }
1095 return false;
1096}
1097
Tom Stellardca166212017-01-30 21:56:46 +00001098bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1099 MachineBasicBlock *BB = I.getParent();
1100 MachineFunction *MF = BB->getParent();
1101 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001102 const DebugLoc &DL = I.getDebugLoc();
1103 Register DstReg = I.getOperand(0).getReg();
1104 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001105 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1106 unsigned Opcode;
1107
Matt Arsenaulta3107272019-07-01 16:36:39 +00001108 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1109 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1110 return false;
1111 }
1112
Tom Stellardca166212017-01-30 21:56:46 +00001113 SmallVector<GEPInfo, 4> AddrInfo;
1114
1115 getAddrModeInfo(I, MRI, AddrInfo);
1116
Tom Stellardca166212017-01-30 21:56:46 +00001117 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001118 case 32:
1119 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1120 break;
1121 case 64:
1122 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1123 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001124 default:
1125 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1126 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001127 }
1128
1129 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1130 .add(I.getOperand(0))
1131 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001132 .addImm(0) // offset
1133 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001134 .addImm(0) // slc
1135 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001136
1137 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1138 I.eraseFromParent();
1139 return Ret;
1140}
1141
Matt Arsenault64642802019-07-01 15:39:27 +00001142bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1143 MachineBasicBlock *BB = I.getParent();
1144 MachineFunction *MF = BB->getParent();
1145 MachineRegisterInfo &MRI = MF->getRegInfo();
1146 MachineOperand &CondOp = I.getOperand(0);
1147 Register CondReg = CondOp.getReg();
1148 const DebugLoc &DL = I.getDebugLoc();
1149
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001150 unsigned BrOpcode;
1151 Register CondPhysReg;
1152 const TargetRegisterClass *ConstrainRC;
1153
1154 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1155 // whether the branch is uniform when selecting the instruction. In
1156 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1157 // RegBankSelect knows what it's doing if the branch condition is scc, even
1158 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001159 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001160 CondPhysReg = AMDGPU::SCC;
1161 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1162 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1163 } else if (isVCC(CondReg, MRI)) {
1164 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1165 // We sort of know that a VCC producer based on the register bank, that ands
1166 // inactive lanes with 0. What if there was a logical operation with vcc
1167 // producers in different blocks/with different exec masks?
1168 // FIXME: Should scc->vcc copies and with exec?
1169 CondPhysReg = TRI.getVCC();
1170 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1171 ConstrainRC = TRI.getBoolRC();
1172 } else
1173 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001174
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001175 if (!MRI.getRegClassOrNull(CondReg))
1176 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001177
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001178 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1179 .addReg(CondReg);
1180 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1181 .addMBB(I.getOperand(1).getMBB());
1182
1183 I.eraseFromParent();
1184 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001185}
1186
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001187bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1188 MachineBasicBlock *BB = I.getParent();
1189 MachineFunction *MF = BB->getParent();
1190 MachineRegisterInfo &MRI = MF->getRegInfo();
1191
1192 Register DstReg = I.getOperand(0).getReg();
1193 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1194 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1195 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1196 if (IsVGPR)
1197 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1198
1199 return RBI.constrainGenericRegister(
1200 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1201}
1202
Daniel Sandersf76f3152017-11-16 00:46:35 +00001203bool AMDGPUInstructionSelector::select(MachineInstr &I,
1204 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001205 if (I.isPHI())
1206 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001207
Tom Stellard7712ee82018-06-22 00:44:29 +00001208 if (!isPreISelGenericOpcode(I.getOpcode())) {
1209 if (I.isCopy())
1210 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001211 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001212 }
Tom Stellardca166212017-01-30 21:56:46 +00001213
1214 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001215 case TargetOpcode::G_ADD:
1216 if (selectG_ADD(I))
1217 return true;
1218 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001219 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001220 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001221 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001222 case TargetOpcode::G_BITCAST:
1223 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001224 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001225 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001226 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001227 case TargetOpcode::G_EXTRACT:
1228 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001229 case TargetOpcode::G_MERGE_VALUES:
1230 case TargetOpcode::G_CONCAT_VECTORS:
1231 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001232 case TargetOpcode::G_UNMERGE_VALUES:
1233 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001234 case TargetOpcode::G_GEP:
1235 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001236 case TargetOpcode::G_IMPLICIT_DEF:
1237 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001238 case TargetOpcode::G_INSERT:
1239 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001240 case TargetOpcode::G_INTRINSIC:
1241 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001242 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1243 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001244 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001245 if (selectG_ICMP(I))
1246 return true;
1247 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001248 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001249 if (selectImpl(I, CoverageInfo))
1250 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001251 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001252 case TargetOpcode::G_SELECT:
1253 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001254 case TargetOpcode::G_STORE:
1255 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001256 case TargetOpcode::G_TRUNC:
1257 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001258 case TargetOpcode::G_SEXT:
1259 case TargetOpcode::G_ZEXT:
1260 case TargetOpcode::G_ANYEXT:
1261 if (selectG_SZA_EXT(I)) {
1262 I.eraseFromParent();
1263 return true;
1264 }
1265
1266 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001267 case TargetOpcode::G_BRCOND:
1268 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001269 case TargetOpcode::G_FRAME_INDEX:
1270 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001271 case TargetOpcode::G_FENCE:
1272 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1273 // is checking for G_CONSTANT
1274 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1275 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001276 }
1277 return false;
1278}
Tom Stellard1dc90202018-05-10 20:53:06 +00001279
Tom Stellard26fac0f2018-06-22 02:54:57 +00001280InstructionSelector::ComplexRendererFns
1281AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1282 return {{
1283 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1284 }};
1285
1286}
1287
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001288std::pair<Register, unsigned>
1289AMDGPUInstructionSelector::selectVOP3ModsImpl(
1290 Register Src, const MachineRegisterInfo &MRI) const {
1291 unsigned Mods = 0;
1292 MachineInstr *MI = MRI.getVRegDef(Src);
1293
1294 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1295 Src = MI->getOperand(1).getReg();
1296 Mods |= SISrcMods::NEG;
1297 MI = MRI.getVRegDef(Src);
1298 }
1299
1300 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1301 Src = MI->getOperand(1).getReg();
1302 Mods |= SISrcMods::ABS;
1303 }
1304
1305 return std::make_pair(Src, Mods);
1306}
1307
Tom Stellard1dc90202018-05-10 20:53:06 +00001308///
1309/// This will select either an SGPR or VGPR operand and will save us from
1310/// having to write an extra tablegen pattern.
1311InstructionSelector::ComplexRendererFns
1312AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1313 return {{
1314 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1315 }};
1316}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001317
1318InstructionSelector::ComplexRendererFns
1319AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001320 MachineRegisterInfo &MRI
1321 = Root.getParent()->getParent()->getParent()->getRegInfo();
1322
1323 Register Src;
1324 unsigned Mods;
1325 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1326
Tom Stellarddcc95e92018-05-11 05:44:16 +00001327 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001328 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1329 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1330 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1331 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001332 }};
1333}
Tom Stellard9a653572018-06-22 02:34:29 +00001334InstructionSelector::ComplexRendererFns
1335AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1336 return {{
1337 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1338 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1339 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1340 }};
1341}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001342
1343InstructionSelector::ComplexRendererFns
1344AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001345 MachineRegisterInfo &MRI
1346 = Root.getParent()->getParent()->getParent()->getRegInfo();
1347
1348 Register Src;
1349 unsigned Mods;
1350 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1351
Tom Stellard46bbbc32018-06-13 22:30:47 +00001352 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001353 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1354 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001355 }};
1356}
Tom Stellard79b5c382019-02-20 21:02:37 +00001357
1358InstructionSelector::ComplexRendererFns
1359AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1360 MachineRegisterInfo &MRI =
1361 Root.getParent()->getParent()->getParent()->getRegInfo();
1362
1363 SmallVector<GEPInfo, 4> AddrInfo;
1364 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1365
1366 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1367 return None;
1368
1369 const GEPInfo &GEPInfo = AddrInfo[0];
1370
1371 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1372 return None;
1373
1374 unsigned PtrReg = GEPInfo.SgprParts[0];
1375 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1376 return {{
1377 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1378 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1379 }};
1380}
1381
1382InstructionSelector::ComplexRendererFns
1383AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1384 MachineRegisterInfo &MRI =
1385 Root.getParent()->getParent()->getParent()->getRegInfo();
1386
1387 SmallVector<GEPInfo, 4> AddrInfo;
1388 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1389
1390 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1391 return None;
1392
1393 const GEPInfo &GEPInfo = AddrInfo[0];
1394 unsigned PtrReg = GEPInfo.SgprParts[0];
1395 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1396 if (!isUInt<32>(EncodedImm))
1397 return None;
1398
1399 return {{
1400 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1401 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1402 }};
1403}
1404
1405InstructionSelector::ComplexRendererFns
1406AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1407 MachineInstr *MI = Root.getParent();
1408 MachineBasicBlock *MBB = MI->getParent();
1409 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1410
1411 SmallVector<GEPInfo, 4> AddrInfo;
1412 getAddrModeInfo(*MI, MRI, AddrInfo);
1413
1414 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1415 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1416 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1417 return None;
1418
1419 const GEPInfo &GEPInfo = AddrInfo[0];
1420 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1421 return None;
1422
1423 // If we make it this far we have a load with an 32-bit immediate offset.
1424 // It is OK to select this using a sgpr offset, because we have already
1425 // failed trying to select this load into one of the _IMM variants since
1426 // the _IMM Patterns are considered before the _SGPR patterns.
1427 unsigned PtrReg = GEPInfo.SgprParts[0];
1428 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1429 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1430 .addImm(GEPInfo.Imm);
1431 return {{
1432 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1433 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1434 }};
1435}