blob: f5a742b1e2256656b81d556fc68713fd83767f49 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault18b71332019-07-15 19:44:07 +000089 const LLT Ty = MRI.getType(Reg);
Matt Arsenault2ab25f92019-07-01 16:06:02 +000090 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault18b71332019-07-15 19:44:07 +000091 Ty.isValid() && Ty.getSizeInBits() == 1;
Matt Arsenault9f992c22019-07-01 13:22:07 +000092 }
93
94 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
95 return RB->getID() == AMDGPU::VCCRegBankID;
96}
97
Tom Stellard1e0edad2018-05-10 21:20:10 +000098bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Matt Arsenault18b71332019-07-15 19:44:07 +000099 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard1e0edad2018-05-10 21:20:10 +0000100 MachineBasicBlock *BB = I.getParent();
101 MachineFunction *MF = BB->getParent();
102 MachineRegisterInfo &MRI = MF->getRegInfo();
103 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000104
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000105 const MachineOperand &Src = I.getOperand(1);
Matt Arsenault18b71332019-07-15 19:44:07 +0000106 MachineOperand &Dst = I.getOperand(0);
107 Register DstReg = Dst.getReg();
108 Register SrcReg = Src.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault18b71332019-07-15 19:44:07 +0000110 if (isVCC(DstReg, MRI)) {
111 if (SrcReg == AMDGPU::SCC) {
112 const TargetRegisterClass *RC
113 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
114 if (!RC)
115 return true;
116 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000117 }
Matt Arsenault18b71332019-07-15 19:44:07 +0000118
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000119 if (!isVCC(SrcReg, MRI)) {
120 // TODO: Should probably leave the copy and let copyPhysReg expand it.
121 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
122 return false;
Matt Arsenault3bfdb542019-07-15 19:45:49 +0000123
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000124 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
125 .addImm(0)
126 .addReg(SrcReg);
Matt Arsenault18b71332019-07-15 19:44:07 +0000127
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000128 if (!MRI.getRegClassOrNull(SrcReg))
129 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
130 I.eraseFromParent();
131 return true;
132 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000133 }
134
Tom Stellard1e0edad2018-05-10 21:20:10 +0000135 for (const MachineOperand &MO : I.operands()) {
136 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
137 continue;
138
139 const TargetRegisterClass *RC =
140 TRI.getConstrainedRegClassForOperand(MO, MRI);
141 if (!RC)
142 continue;
143 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
144 }
145 return true;
146}
147
Matt Arsenaulte1006252019-07-01 16:32:47 +0000148bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
149 MachineBasicBlock *BB = I.getParent();
150 MachineFunction *MF = BB->getParent();
151 MachineRegisterInfo &MRI = MF->getRegInfo();
152
153 const Register DefReg = I.getOperand(0).getReg();
154 const LLT DefTy = MRI.getType(DefReg);
155
156 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
157
158 const RegClassOrRegBank &RegClassOrBank =
159 MRI.getRegClassOrRegBank(DefReg);
160
161 const TargetRegisterClass *DefRC
162 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
163 if (!DefRC) {
164 if (!DefTy.isValid()) {
165 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
166 return false;
167 }
168
169 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
170 if (RB.getID() == AMDGPU::SCCRegBankID) {
171 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
172 return false;
173 }
174
175 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
176 if (!DefRC) {
177 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
178 return false;
179 }
180 }
181
182 I.setDesc(TII.get(TargetOpcode::PHI));
183 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
184}
185
Tom Stellardca166212017-01-30 21:56:46 +0000186MachineOperand
187AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000188 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000189 unsigned SubIdx) const {
190
191 MachineInstr *MI = MO.getParent();
192 MachineBasicBlock *BB = MO.getParent()->getParent();
193 MachineFunction *MF = BB->getParent();
194 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000195 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000196
197 if (MO.isReg()) {
198 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
199 unsigned Reg = MO.getReg();
200 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
201 .addReg(Reg, 0, ComposedSubIdx);
202
203 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
204 MO.isKill(), MO.isDead(), MO.isUndef(),
205 MO.isEarlyClobber(), 0, MO.isDebug(),
206 MO.isInternalRead());
207 }
208
209 assert(MO.isImm());
210
211 APInt Imm(64, MO.getImm());
212
213 switch (SubIdx) {
214 default:
215 llvm_unreachable("do not know to split immediate with this sub index.");
216 case AMDGPU::sub0:
217 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
218 case AMDGPU::sub1:
219 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
220 }
221}
222
Tom Stellard390a5f42018-07-13 21:05:14 +0000223static int64_t getConstant(const MachineInstr *MI) {
224 return MI->getOperand(1).getCImm()->getSExtValue();
225}
226
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000227bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000228 MachineBasicBlock *BB = I.getParent();
229 MachineFunction *MF = BB->getParent();
230 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000231 Register DstReg = I.getOperand(0).getReg();
232 const DebugLoc &DL = I.getDebugLoc();
233 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
234 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
235 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000236 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000237
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000238 if (Size == 32) {
239 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000240 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000241 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000242 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000243 .add(I.getOperand(1))
244 .add(I.getOperand(2));
245 I.eraseFromParent();
246 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
247 }
Tom Stellardca166212017-01-30 21:56:46 +0000248
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000249 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000250 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
251 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000252 I.addOperand(*MF, MachineOperand::CreateImm(0));
253 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
254 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
255 }
Tom Stellardca166212017-01-30 21:56:46 +0000256
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000257 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
258
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000259 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
260 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000261 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000262 .addDef(UnusedCarry, RegState::Dead)
263 .add(I.getOperand(1))
264 .add(I.getOperand(2))
265 .addImm(0);
266 I.eraseFromParent();
267 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000268 }
269
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000270 assert(!Sub && "illegal sub should not reach here");
271
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000272 const TargetRegisterClass &RC
273 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
274 const TargetRegisterClass &HalfRC
275 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
276
277 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
278 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
279 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
280 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
281
282 Register DstLo = MRI.createVirtualRegister(&HalfRC);
283 Register DstHi = MRI.createVirtualRegister(&HalfRC);
284
285 if (IsSALU) {
286 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
287 .add(Lo1)
288 .add(Lo2);
289 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
290 .add(Hi1)
291 .add(Hi2);
292 } else {
293 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
294 Register CarryReg = MRI.createVirtualRegister(CarryRC);
295 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
296 .addDef(CarryReg)
297 .add(Lo1)
298 .add(Lo2)
299 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000300 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000301 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
302 .add(Hi1)
303 .add(Hi2)
304 .addReg(CarryReg, RegState::Kill)
305 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000306
307 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
308 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000309 }
310
311 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
312 .addReg(DstLo)
313 .addImm(AMDGPU::sub0)
314 .addReg(DstHi)
315 .addImm(AMDGPU::sub1);
316
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000317
318 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000319 return false;
320
Tom Stellardca166212017-01-30 21:56:46 +0000321 I.eraseFromParent();
322 return true;
323}
324
Tom Stellard41f32192019-02-28 23:37:48 +0000325bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
326 MachineBasicBlock *BB = I.getParent();
327 MachineFunction *MF = BB->getParent();
328 MachineRegisterInfo &MRI = MF->getRegInfo();
329 assert(I.getOperand(2).getImm() % 32 == 0);
330 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
331 const DebugLoc &DL = I.getDebugLoc();
332 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
333 I.getOperand(0).getReg())
334 .addReg(I.getOperand(1).getReg(), 0, SubReg);
335
336 for (const MachineOperand &MO : Copy->operands()) {
337 const TargetRegisterClass *RC =
338 TRI.getConstrainedRegClassForOperand(MO, MRI);
339 if (!RC)
340 continue;
341 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
342 }
343 I.eraseFromParent();
344 return true;
345}
346
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000347bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
348 MachineBasicBlock *BB = MI.getParent();
349 MachineFunction *MF = BB->getParent();
350 MachineRegisterInfo &MRI = MF->getRegInfo();
351 Register DstReg = MI.getOperand(0).getReg();
352 LLT DstTy = MRI.getType(DstReg);
353 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
354
355 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000356 if (SrcSize < 32)
357 return false;
358
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000359 const DebugLoc &DL = MI.getDebugLoc();
360 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
361 const unsigned DstSize = DstTy.getSizeInBits();
362 const TargetRegisterClass *DstRC =
363 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
364 if (!DstRC)
365 return false;
366
367 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
368 MachineInstrBuilder MIB =
369 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
370 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
371 MachineOperand &Src = MI.getOperand(I + 1);
372 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
373 MIB.addImm(SubRegs[I]);
374
375 const TargetRegisterClass *SrcRC
376 = TRI.getConstrainedRegClassForOperand(Src, MRI);
377 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
378 return false;
379 }
380
381 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
382 return false;
383
384 MI.eraseFromParent();
385 return true;
386}
387
Matt Arsenault872f38b2019-07-09 14:02:26 +0000388bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
389 MachineBasicBlock *BB = MI.getParent();
390 MachineFunction *MF = BB->getParent();
391 MachineRegisterInfo &MRI = MF->getRegInfo();
392 const int NumDst = MI.getNumOperands() - 1;
393
394 MachineOperand &Src = MI.getOperand(NumDst);
395
396 Register SrcReg = Src.getReg();
397 Register DstReg0 = MI.getOperand(0).getReg();
398 LLT DstTy = MRI.getType(DstReg0);
399 LLT SrcTy = MRI.getType(SrcReg);
400
401 const unsigned DstSize = DstTy.getSizeInBits();
402 const unsigned SrcSize = SrcTy.getSizeInBits();
403 const DebugLoc &DL = MI.getDebugLoc();
404 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
405
406 const TargetRegisterClass *SrcRC =
407 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
408 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
409 return false;
410
411 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
412
413 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
414 // source, and this relies on the fact that the same subregister indices are
415 // used for both.
416 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
417 for (int I = 0, E = NumDst; I != E; ++I) {
418 MachineOperand &Dst = MI.getOperand(I);
419 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
420 .addReg(SrcReg, SrcFlags, SubRegs[I]);
421
422 const TargetRegisterClass *DstRC =
423 TRI.getConstrainedRegClassForOperand(Dst, MRI);
424 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
425 return false;
426 }
427
428 MI.eraseFromParent();
429 return true;
430}
431
Tom Stellardca166212017-01-30 21:56:46 +0000432bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000433 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000434}
435
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000436bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
437 MachineBasicBlock *BB = I.getParent();
438 MachineFunction *MF = BB->getParent();
439 MachineRegisterInfo &MRI = MF->getRegInfo();
440 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000441
442 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
443 // regbank check here is to know why getConstrainedRegClassForOperand failed.
444 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
445 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
446 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
447 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
448 return true;
449 }
450
451 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000452}
453
Tom Stellard33634d1b2019-03-01 00:50:26 +0000454bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
455 MachineBasicBlock *BB = I.getParent();
456 MachineFunction *MF = BB->getParent();
457 MachineRegisterInfo &MRI = MF->getRegInfo();
458 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
459 DebugLoc DL = I.getDebugLoc();
460 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
461 .addDef(I.getOperand(0).getReg())
462 .addReg(I.getOperand(1).getReg())
463 .addReg(I.getOperand(2).getReg())
464 .addImm(SubReg);
465
466 for (const MachineOperand &MO : Ins->operands()) {
467 if (!MO.isReg())
468 continue;
469 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
470 continue;
471
472 const TargetRegisterClass *RC =
473 TRI.getConstrainedRegClassForOperand(MO, MRI);
474 if (!RC)
475 continue;
476 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
477 }
478 I.eraseFromParent();
479 return true;
480}
481
Matt Arsenault50be3482019-07-02 14:52:16 +0000482bool AMDGPUInstructionSelector::selectG_INTRINSIC(
483 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000484 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000485 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000486 case Intrinsic::maxnum:
487 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000488 case Intrinsic::amdgcn_cvt_pkrtz:
489 return selectImpl(I, CoverageInfo);
Matt Arsenault53fa7592019-07-15 18:25:24 +0000490 case Intrinsic::amdgcn_if_break: {
491 MachineBasicBlock *BB = I.getParent();
492 MachineFunction *MF = BB->getParent();
493 MachineRegisterInfo &MRI = MF->getRegInfo();
494
495 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
496 // SelectionDAG uses for wave32 vs wave64.
497 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
498 .add(I.getOperand(0))
499 .add(I.getOperand(2))
500 .add(I.getOperand(3));
501
502 Register DstReg = I.getOperand(0).getReg();
503 Register Src0Reg = I.getOperand(2).getReg();
504 Register Src1Reg = I.getOperand(3).getReg();
505
506 I.eraseFromParent();
507
508 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
509 if (!MRI.getRegClassOrNull(Reg))
510 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
511 }
512
513 return true;
514 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000515 default:
516 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000517 }
Tom Stellarda9284732018-06-14 19:26:37 +0000518}
519
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000520static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
521 if (Size != 32 && Size != 64)
522 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000523 switch (P) {
524 default:
525 llvm_unreachable("Unknown condition code!");
526 case CmpInst::ICMP_NE:
527 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
528 case CmpInst::ICMP_EQ:
529 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
530 case CmpInst::ICMP_SGT:
531 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
532 case CmpInst::ICMP_SGE:
533 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
534 case CmpInst::ICMP_SLT:
535 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
536 case CmpInst::ICMP_SLE:
537 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
538 case CmpInst::ICMP_UGT:
539 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
540 case CmpInst::ICMP_UGE:
541 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
542 case CmpInst::ICMP_ULT:
543 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
544 case CmpInst::ICMP_ULE:
545 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
546 }
547}
548
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000549int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
550 unsigned Size) const {
551 if (Size == 64) {
552 if (!STI.hasScalarCompareEq64())
553 return -1;
554
555 switch (P) {
556 case CmpInst::ICMP_NE:
557 return AMDGPU::S_CMP_LG_U64;
558 case CmpInst::ICMP_EQ:
559 return AMDGPU::S_CMP_EQ_U64;
560 default:
561 return -1;
562 }
563 }
564
565 if (Size != 32)
566 return -1;
567
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000568 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000569 case CmpInst::ICMP_NE:
570 return AMDGPU::S_CMP_LG_U32;
571 case CmpInst::ICMP_EQ:
572 return AMDGPU::S_CMP_EQ_U32;
573 case CmpInst::ICMP_SGT:
574 return AMDGPU::S_CMP_GT_I32;
575 case CmpInst::ICMP_SGE:
576 return AMDGPU::S_CMP_GE_I32;
577 case CmpInst::ICMP_SLT:
578 return AMDGPU::S_CMP_LT_I32;
579 case CmpInst::ICMP_SLE:
580 return AMDGPU::S_CMP_LE_I32;
581 case CmpInst::ICMP_UGT:
582 return AMDGPU::S_CMP_GT_U32;
583 case CmpInst::ICMP_UGE:
584 return AMDGPU::S_CMP_GE_U32;
585 case CmpInst::ICMP_ULT:
586 return AMDGPU::S_CMP_LT_U32;
587 case CmpInst::ICMP_ULE:
588 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000589 default:
590 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000591 }
592}
593
594bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
595 MachineBasicBlock *BB = I.getParent();
596 MachineFunction *MF = BB->getParent();
597 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000598 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000599
600 unsigned SrcReg = I.getOperand(2).getReg();
601 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000602
603 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000604
605 unsigned CCReg = I.getOperand(0).getReg();
606 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000607 int Opcode = getS_CMPOpcode(Pred, Size);
608 if (Opcode == -1)
609 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000610 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
611 .add(I.getOperand(2))
612 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000613 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
614 .addReg(AMDGPU::SCC);
615 bool Ret =
616 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
617 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000618 I.eraseFromParent();
619 return Ret;
620 }
621
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000622 int Opcode = getV_CMPOpcode(Pred, Size);
623 if (Opcode == -1)
624 return false;
625
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000626 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
627 I.getOperand(0).getReg())
628 .add(I.getOperand(2))
629 .add(I.getOperand(3));
630 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000631 *TRI.getBoolRC(), MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000632 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
633 I.eraseFromParent();
634 return Ret;
635}
636
Tom Stellard390a5f42018-07-13 21:05:14 +0000637static MachineInstr *
638buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
639 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
640 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
641 const DebugLoc &DL = Insert->getDebugLoc();
642 MachineBasicBlock &BB = *Insert->getParent();
643 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
644 return BuildMI(BB, Insert, DL, TII.get(Opcode))
645 .addImm(Tgt)
646 .addReg(Reg0)
647 .addReg(Reg1)
648 .addReg(Reg2)
649 .addReg(Reg3)
650 .addImm(VM)
651 .addImm(Compr)
652 .addImm(Enabled);
653}
654
655bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000656 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000657 MachineBasicBlock *BB = I.getParent();
658 MachineFunction *MF = BB->getParent();
659 MachineRegisterInfo &MRI = MF->getRegInfo();
660
661 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
662 switch (IntrinsicID) {
663 case Intrinsic::amdgcn_exp: {
664 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
665 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
666 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
667 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
668
669 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
670 I.getOperand(4).getReg(),
671 I.getOperand(5).getReg(),
672 I.getOperand(6).getReg(),
673 VM, false, Enabled, Done);
674
675 I.eraseFromParent();
676 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
677 }
678 case Intrinsic::amdgcn_exp_compr: {
679 const DebugLoc &DL = I.getDebugLoc();
680 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
681 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
682 unsigned Reg0 = I.getOperand(3).getReg();
683 unsigned Reg1 = I.getOperand(4).getReg();
684 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
685 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
686 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
687
688 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
689 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
690 true, Enabled, Done);
691
692 I.eraseFromParent();
693 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
694 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000695 case Intrinsic::amdgcn_end_cf: {
696 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
697 // SelectionDAG uses for wave32 vs wave64.
698 BuildMI(*BB, &I, I.getDebugLoc(),
699 TII.get(AMDGPU::SI_END_CF))
700 .add(I.getOperand(1));
701
702 Register Reg = I.getOperand(1).getReg();
703 I.eraseFromParent();
704
705 if (!MRI.getRegClassOrNull(Reg))
706 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
707 return true;
708 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000709 default:
710 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000711 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000712}
713
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000714bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
715 MachineBasicBlock *BB = I.getParent();
716 MachineFunction *MF = BB->getParent();
717 MachineRegisterInfo &MRI = MF->getRegInfo();
718 const DebugLoc &DL = I.getDebugLoc();
719
720 unsigned DstReg = I.getOperand(0).getReg();
721 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000722 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000723 const MachineOperand &CCOp = I.getOperand(1);
724 unsigned CCReg = CCOp.getReg();
725 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000726 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
727 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000728 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
729 .addReg(CCReg);
730
731 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
732 // bank, because it does not cover the register class that we used to represent
733 // for it. So we need to manually set the register class here.
734 if (!MRI.getRegClassOrNull(CCReg))
735 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
736 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
737 .add(I.getOperand(2))
738 .add(I.getOperand(3));
739
740 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
741 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
742 I.eraseFromParent();
743 return Ret;
744 }
745
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000746 // Wide VGPR select should have been split in RegBankSelect.
747 if (Size > 32)
748 return false;
749
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000750 MachineInstr *Select =
751 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
752 .addImm(0)
753 .add(I.getOperand(3))
754 .addImm(0)
755 .add(I.getOperand(2))
756 .add(I.getOperand(1));
757
758 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
759 I.eraseFromParent();
760 return Ret;
761}
762
Tom Stellardca166212017-01-30 21:56:46 +0000763bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
764 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000765 MachineFunction *MF = BB->getParent();
766 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000767 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000768 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
769 if (PtrSize != 64) {
770 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
771 return false;
772 }
773
Tom Stellard655fdd32018-05-11 23:12:49 +0000774 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
775 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000776
777 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000778 switch (StoreSize) {
779 default:
780 return false;
781 case 32:
782 Opcode = AMDGPU::FLAT_STORE_DWORD;
783 break;
784 case 64:
785 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
786 break;
787 case 96:
788 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
789 break;
790 case 128:
791 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
792 break;
793 }
794
795 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000796 .add(I.getOperand(1))
797 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000798 .addImm(0) // offset
799 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000800 .addImm(0) // slc
801 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000802
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000803
Tom Stellardca166212017-01-30 21:56:46 +0000804 // Now that we selected an opcode, we need to constrain the register
805 // operands to use appropriate classes.
806 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
807
808 I.eraseFromParent();
809 return Ret;
810}
811
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000812static int sizeToSubRegIndex(unsigned Size) {
813 switch (Size) {
814 case 32:
815 return AMDGPU::sub0;
816 case 64:
817 return AMDGPU::sub0_sub1;
818 case 96:
819 return AMDGPU::sub0_sub1_sub2;
820 case 128:
821 return AMDGPU::sub0_sub1_sub2_sub3;
822 case 256:
823 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
824 default:
825 if (Size < 32)
826 return AMDGPU::sub0;
827 if (Size > 256)
828 return -1;
829 return sizeToSubRegIndex(PowerOf2Ceil(Size));
830 }
831}
832
833bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
834 MachineBasicBlock *BB = I.getParent();
835 MachineFunction *MF = BB->getParent();
836 MachineRegisterInfo &MRI = MF->getRegInfo();
837
838 unsigned DstReg = I.getOperand(0).getReg();
839 unsigned SrcReg = I.getOperand(1).getReg();
840 const LLT DstTy = MRI.getType(DstReg);
841 const LLT SrcTy = MRI.getType(SrcReg);
842 if (!DstTy.isScalar())
843 return false;
844
845 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
846 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
847 if (SrcRB != DstRB)
848 return false;
849
850 unsigned DstSize = DstTy.getSizeInBits();
851 unsigned SrcSize = SrcTy.getSizeInBits();
852
853 const TargetRegisterClass *SrcRC
854 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
855 const TargetRegisterClass *DstRC
856 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
857
858 if (SrcSize > 32) {
859 int SubRegIdx = sizeToSubRegIndex(DstSize);
860 if (SubRegIdx == -1)
861 return false;
862
863 // Deal with weird cases where the class only partially supports the subreg
864 // index.
865 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
866 if (!SrcRC)
867 return false;
868
869 I.getOperand(1).setSubReg(SubRegIdx);
870 }
871
872 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
873 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
874 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
875 return false;
876 }
877
878 I.setDesc(TII.get(TargetOpcode::COPY));
879 return true;
880}
881
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000882/// \returns true if a bitmask for \p Size bits will be an inline immediate.
883static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
884 Mask = maskTrailingOnes<unsigned>(Size);
885 int SignedMask = static_cast<int>(Mask);
886 return SignedMask >= -16 && SignedMask <= 64;
887}
888
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000889bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
890 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
891 const DebugLoc &DL = I.getDebugLoc();
892 MachineBasicBlock &MBB = *I.getParent();
893 MachineFunction &MF = *MBB.getParent();
894 MachineRegisterInfo &MRI = MF.getRegInfo();
895 const unsigned DstReg = I.getOperand(0).getReg();
896 const unsigned SrcReg = I.getOperand(1).getReg();
897
898 const LLT DstTy = MRI.getType(DstReg);
899 const LLT SrcTy = MRI.getType(SrcReg);
900 const LLT S1 = LLT::scalar(1);
901 const unsigned SrcSize = SrcTy.getSizeInBits();
902 const unsigned DstSize = DstTy.getSizeInBits();
903 if (!DstTy.isScalar())
904 return false;
905
906 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
907
908 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
909 if (SrcTy != S1 || DstSize > 64) // Invalid
910 return false;
911
912 unsigned Opcode =
913 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
914 const TargetRegisterClass *DstRC =
915 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
916
917 // FIXME: Create an extra copy to avoid incorrectly constraining the result
918 // of the scc producer.
919 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
920 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
921 .addReg(SrcReg);
922 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
923 .addReg(TmpReg);
924
925 // The instruction operands are backwards from what you would expect.
926 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
927 .addImm(0)
928 .addImm(Signed ? -1 : 1);
929 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
930 }
931
932 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
933 if (SrcTy != S1) // Invalid
934 return false;
935
936 MachineInstr *ExtI =
937 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
938 .addImm(0) // src0_modifiers
939 .addImm(0) // src0
940 .addImm(0) // src1_modifiers
941 .addImm(Signed ? -1 : 1) // src1
942 .addUse(SrcReg);
943 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
944 }
945
946 if (I.getOpcode() == AMDGPU::G_ANYEXT)
947 return selectCOPY(I);
948
949 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
950 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000951
952 // Try to use an and with a mask if it will save code size.
953 unsigned Mask;
954 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
955 MachineInstr *ExtI =
956 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
957 .addImm(Mask)
958 .addReg(SrcReg);
959 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
960 }
961
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000962 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
963 MachineInstr *ExtI =
964 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
965 .addReg(SrcReg)
966 .addImm(0) // Offset
967 .addImm(SrcSize); // Width
968 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
969 }
970
971 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
972 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
973 return false;
974
975 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
976 const unsigned SextOpc = SrcSize == 8 ?
977 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
978 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
979 .addReg(SrcReg);
980 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
981 }
982
983 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
984 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
985
986 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
987 if (DstSize > 32 && SrcSize <= 32) {
988 // We need a 64-bit register source, but the high bits don't matter.
989 unsigned ExtReg
990 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
991 unsigned UndefReg
992 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
993 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
994 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
995 .addReg(SrcReg)
996 .addImm(AMDGPU::sub0)
997 .addReg(UndefReg)
998 .addImm(AMDGPU::sub1);
999
1000 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1001 .addReg(ExtReg)
1002 .addImm(SrcSize << 16);
1003
1004 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1005 }
1006
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001007 unsigned Mask;
1008 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1009 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1010 .addReg(SrcReg)
1011 .addImm(Mask);
1012 } else {
1013 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1014 .addReg(SrcReg)
1015 .addImm(SrcSize << 16);
1016 }
1017
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001018 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1019 }
1020
1021 return false;
1022}
1023
Tom Stellardca166212017-01-30 21:56:46 +00001024bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1025 MachineBasicBlock *BB = I.getParent();
1026 MachineFunction *MF = BB->getParent();
1027 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001028 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001029
Tom Stellarde182b282018-05-15 17:57:09 +00001030 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1031 if (ImmOp.isFPImm()) {
1032 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1033 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1034 } else if (ImmOp.isCImm()) {
1035 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1036 }
1037
1038 unsigned DstReg = I.getOperand(0).getReg();
1039 unsigned Size;
1040 bool IsSgpr;
1041 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1042 if (RB) {
1043 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1044 Size = MRI.getType(DstReg).getSizeInBits();
1045 } else {
1046 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1047 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001048 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001049 }
1050
1051 if (Size != 32 && Size != 64)
1052 return false;
1053
1054 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001055 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001056 I.setDesc(TII.get(Opcode));
1057 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001058 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1059 }
1060
Tom Stellardca166212017-01-30 21:56:46 +00001061 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001062 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1063 &AMDGPU::VGPR_32RegClass;
1064 unsigned LoReg = MRI.createVirtualRegister(RC);
1065 unsigned HiReg = MRI.createVirtualRegister(RC);
1066 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001067
Tom Stellarde182b282018-05-15 17:57:09 +00001068 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001069 .addImm(Imm.trunc(32).getZExtValue());
1070
Tom Stellarde182b282018-05-15 17:57:09 +00001071 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001072 .addImm(Imm.ashr(32).getZExtValue());
1073
Tom Stellarde182b282018-05-15 17:57:09 +00001074 const MachineInstr *RS =
1075 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1076 .addReg(LoReg)
1077 .addImm(AMDGPU::sub0)
1078 .addReg(HiReg)
1079 .addImm(AMDGPU::sub1);
1080
Tom Stellardca166212017-01-30 21:56:46 +00001081 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1082 // work for target independent opcodes
1083 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001084 const TargetRegisterClass *DstRC =
1085 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1086 if (!DstRC)
1087 return true;
1088 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001089}
1090
1091static bool isConstant(const MachineInstr &MI) {
1092 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1093}
1094
1095void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1096 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1097
1098 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1099
1100 assert(PtrMI);
1101
1102 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1103 return;
1104
1105 GEPInfo GEPInfo(*PtrMI);
1106
1107 for (unsigned i = 1, e = 3; i < e; ++i) {
1108 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1109 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1110 assert(OpDef);
1111 if (isConstant(*OpDef)) {
1112 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1113 // are lacking other optimizations.
1114 assert(GEPInfo.Imm == 0);
1115 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1116 continue;
1117 }
1118 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1119 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1120 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1121 else
1122 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1123 }
1124
1125 AddrInfo.push_back(GEPInfo);
1126 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1127}
1128
Tom Stellard79b5c382019-02-20 21:02:37 +00001129bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001130 if (!MI.hasOneMemOperand())
1131 return false;
1132
1133 const MachineMemOperand *MMO = *MI.memoperands_begin();
1134 const Value *Ptr = MMO->getValue();
1135
1136 // UndefValue means this is a load of a kernel input. These are uniform.
1137 // Sometimes LDS instructions have constant pointers.
1138 // If Ptr is null, then that means this mem operand contains a
1139 // PseudoSourceValue like GOT.
1140 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1141 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1142 return true;
1143
Matt Arsenault923712b2018-02-09 16:57:57 +00001144 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1145 return true;
1146
Tom Stellardca166212017-01-30 21:56:46 +00001147 const Instruction *I = dyn_cast<Instruction>(Ptr);
1148 return I && I->getMetadata("amdgpu.uniform");
1149}
1150
Tom Stellardca166212017-01-30 21:56:46 +00001151bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1152 for (const GEPInfo &GEPInfo : AddrInfo) {
1153 if (!GEPInfo.VgprParts.empty())
1154 return true;
1155 }
1156 return false;
1157}
1158
Tom Stellardca166212017-01-30 21:56:46 +00001159bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1160 MachineBasicBlock *BB = I.getParent();
1161 MachineFunction *MF = BB->getParent();
1162 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001163 const DebugLoc &DL = I.getDebugLoc();
1164 Register DstReg = I.getOperand(0).getReg();
1165 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001166 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1167 unsigned Opcode;
1168
Matt Arsenaulta3107272019-07-01 16:36:39 +00001169 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1170 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1171 return false;
1172 }
1173
Tom Stellardca166212017-01-30 21:56:46 +00001174 SmallVector<GEPInfo, 4> AddrInfo;
1175
1176 getAddrModeInfo(I, MRI, AddrInfo);
1177
Tom Stellardca166212017-01-30 21:56:46 +00001178 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001179 case 32:
1180 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1181 break;
1182 case 64:
1183 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1184 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001185 default:
1186 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1187 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001188 }
1189
1190 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1191 .add(I.getOperand(0))
1192 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001193 .addImm(0) // offset
1194 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001195 .addImm(0) // slc
1196 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001197
1198 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1199 I.eraseFromParent();
1200 return Ret;
1201}
1202
Matt Arsenault64642802019-07-01 15:39:27 +00001203bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1204 MachineBasicBlock *BB = I.getParent();
1205 MachineFunction *MF = BB->getParent();
1206 MachineRegisterInfo &MRI = MF->getRegInfo();
1207 MachineOperand &CondOp = I.getOperand(0);
1208 Register CondReg = CondOp.getReg();
1209 const DebugLoc &DL = I.getDebugLoc();
1210
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001211 unsigned BrOpcode;
1212 Register CondPhysReg;
1213 const TargetRegisterClass *ConstrainRC;
1214
1215 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1216 // whether the branch is uniform when selecting the instruction. In
1217 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1218 // RegBankSelect knows what it's doing if the branch condition is scc, even
1219 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001220 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001221 CondPhysReg = AMDGPU::SCC;
1222 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1223 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1224 } else if (isVCC(CondReg, MRI)) {
1225 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1226 // We sort of know that a VCC producer based on the register bank, that ands
1227 // inactive lanes with 0. What if there was a logical operation with vcc
1228 // producers in different blocks/with different exec masks?
1229 // FIXME: Should scc->vcc copies and with exec?
1230 CondPhysReg = TRI.getVCC();
1231 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1232 ConstrainRC = TRI.getBoolRC();
1233 } else
1234 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001235
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001236 if (!MRI.getRegClassOrNull(CondReg))
1237 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001238
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001239 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1240 .addReg(CondReg);
1241 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1242 .addMBB(I.getOperand(1).getMBB());
1243
1244 I.eraseFromParent();
1245 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001246}
1247
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001248bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1249 MachineBasicBlock *BB = I.getParent();
1250 MachineFunction *MF = BB->getParent();
1251 MachineRegisterInfo &MRI = MF->getRegInfo();
1252
1253 Register DstReg = I.getOperand(0).getReg();
1254 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1255 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1256 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1257 if (IsVGPR)
1258 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1259
1260 return RBI.constrainGenericRegister(
1261 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1262}
1263
Daniel Sandersf76f3152017-11-16 00:46:35 +00001264bool AMDGPUInstructionSelector::select(MachineInstr &I,
1265 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001266 if (I.isPHI())
1267 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001268
Tom Stellard7712ee82018-06-22 00:44:29 +00001269 if (!isPreISelGenericOpcode(I.getOpcode())) {
1270 if (I.isCopy())
1271 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001272 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001273 }
Tom Stellardca166212017-01-30 21:56:46 +00001274
1275 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001276 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001277 case TargetOpcode::G_SUB:
1278 if (selectG_ADD_SUB(I))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001279 return true;
1280 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001281 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001282 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001283 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001284 case TargetOpcode::G_BITCAST:
1285 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001286 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001287 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001288 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001289 case TargetOpcode::G_EXTRACT:
1290 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001291 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001292 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001293 case TargetOpcode::G_CONCAT_VECTORS:
1294 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001295 case TargetOpcode::G_UNMERGE_VALUES:
1296 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001297 case TargetOpcode::G_GEP:
1298 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001299 case TargetOpcode::G_IMPLICIT_DEF:
1300 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001301 case TargetOpcode::G_INSERT:
1302 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001303 case TargetOpcode::G_INTRINSIC:
1304 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001305 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1306 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001307 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001308 if (selectG_ICMP(I))
1309 return true;
1310 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001311 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001312 if (selectImpl(I, CoverageInfo))
1313 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001314 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001315 case TargetOpcode::G_SELECT:
1316 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001317 case TargetOpcode::G_STORE:
1318 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001319 case TargetOpcode::G_TRUNC:
1320 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001321 case TargetOpcode::G_SEXT:
1322 case TargetOpcode::G_ZEXT:
1323 case TargetOpcode::G_ANYEXT:
1324 if (selectG_SZA_EXT(I)) {
1325 I.eraseFromParent();
1326 return true;
1327 }
1328
1329 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001330 case TargetOpcode::G_BRCOND:
1331 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001332 case TargetOpcode::G_FRAME_INDEX:
1333 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001334 case TargetOpcode::G_FENCE:
1335 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1336 // is checking for G_CONSTANT
1337 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1338 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001339 }
1340 return false;
1341}
Tom Stellard1dc90202018-05-10 20:53:06 +00001342
Tom Stellard26fac0f2018-06-22 02:54:57 +00001343InstructionSelector::ComplexRendererFns
1344AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1345 return {{
1346 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1347 }};
1348
1349}
1350
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001351std::pair<Register, unsigned>
1352AMDGPUInstructionSelector::selectVOP3ModsImpl(
1353 Register Src, const MachineRegisterInfo &MRI) const {
1354 unsigned Mods = 0;
1355 MachineInstr *MI = MRI.getVRegDef(Src);
1356
1357 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1358 Src = MI->getOperand(1).getReg();
1359 Mods |= SISrcMods::NEG;
1360 MI = MRI.getVRegDef(Src);
1361 }
1362
1363 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1364 Src = MI->getOperand(1).getReg();
1365 Mods |= SISrcMods::ABS;
1366 }
1367
1368 return std::make_pair(Src, Mods);
1369}
1370
Tom Stellard1dc90202018-05-10 20:53:06 +00001371///
1372/// This will select either an SGPR or VGPR operand and will save us from
1373/// having to write an extra tablegen pattern.
1374InstructionSelector::ComplexRendererFns
1375AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1376 return {{
1377 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1378 }};
1379}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001380
1381InstructionSelector::ComplexRendererFns
1382AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001383 MachineRegisterInfo &MRI
1384 = Root.getParent()->getParent()->getParent()->getRegInfo();
1385
1386 Register Src;
1387 unsigned Mods;
1388 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1389
Tom Stellarddcc95e92018-05-11 05:44:16 +00001390 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001391 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1392 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1393 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1394 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001395 }};
1396}
Tom Stellard9a653572018-06-22 02:34:29 +00001397InstructionSelector::ComplexRendererFns
1398AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1399 return {{
1400 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1401 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1402 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1403 }};
1404}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001405
1406InstructionSelector::ComplexRendererFns
1407AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001408 MachineRegisterInfo &MRI
1409 = Root.getParent()->getParent()->getParent()->getRegInfo();
1410
1411 Register Src;
1412 unsigned Mods;
1413 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1414
Tom Stellard46bbbc32018-06-13 22:30:47 +00001415 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001416 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1417 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001418 }};
1419}
Tom Stellard79b5c382019-02-20 21:02:37 +00001420
1421InstructionSelector::ComplexRendererFns
1422AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1423 MachineRegisterInfo &MRI =
1424 Root.getParent()->getParent()->getParent()->getRegInfo();
1425
1426 SmallVector<GEPInfo, 4> AddrInfo;
1427 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1428
1429 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1430 return None;
1431
1432 const GEPInfo &GEPInfo = AddrInfo[0];
1433
1434 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1435 return None;
1436
1437 unsigned PtrReg = GEPInfo.SgprParts[0];
1438 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1439 return {{
1440 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1441 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1442 }};
1443}
1444
1445InstructionSelector::ComplexRendererFns
1446AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1447 MachineRegisterInfo &MRI =
1448 Root.getParent()->getParent()->getParent()->getRegInfo();
1449
1450 SmallVector<GEPInfo, 4> AddrInfo;
1451 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1452
1453 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1454 return None;
1455
1456 const GEPInfo &GEPInfo = AddrInfo[0];
1457 unsigned PtrReg = GEPInfo.SgprParts[0];
1458 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1459 if (!isUInt<32>(EncodedImm))
1460 return None;
1461
1462 return {{
1463 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1464 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1465 }};
1466}
1467
1468InstructionSelector::ComplexRendererFns
1469AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1470 MachineInstr *MI = Root.getParent();
1471 MachineBasicBlock *MBB = MI->getParent();
1472 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1473
1474 SmallVector<GEPInfo, 4> AddrInfo;
1475 getAddrModeInfo(*MI, MRI, AddrInfo);
1476
1477 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1478 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1479 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1480 return None;
1481
1482 const GEPInfo &GEPInfo = AddrInfo[0];
1483 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1484 return None;
1485
1486 // If we make it this far we have a load with an 32-bit immediate offset.
1487 // It is OK to select this using a sgpr offset, because we have already
1488 // failed trying to select this load into one of the _IMM variants since
1489 // the _IMM Patterns are considered before the _SGPR patterns.
1490 unsigned PtrReg = GEPInfo.SgprParts[0];
1491 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1492 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1493 .addImm(GEPInfo.Imm);
1494 return {{
1495 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1496 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1497 }};
1498}