blob: 0d02e738b4ba54633140e49e004092421f7d7513 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault18b71332019-07-15 19:44:07 +000089 const LLT Ty = MRI.getType(Reg);
Matt Arsenault2ab25f92019-07-01 16:06:02 +000090 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault18b71332019-07-15 19:44:07 +000091 Ty.isValid() && Ty.getSizeInBits() == 1;
Matt Arsenault9f992c22019-07-01 13:22:07 +000092 }
93
94 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
95 return RB->getID() == AMDGPU::VCCRegBankID;
96}
97
Tom Stellard1e0edad2018-05-10 21:20:10 +000098bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Matt Arsenault18b71332019-07-15 19:44:07 +000099 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard1e0edad2018-05-10 21:20:10 +0000100 MachineBasicBlock *BB = I.getParent();
101 MachineFunction *MF = BB->getParent();
102 MachineRegisterInfo &MRI = MF->getRegInfo();
103 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000104
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000105 const MachineOperand &Src = I.getOperand(1);
Matt Arsenault18b71332019-07-15 19:44:07 +0000106 MachineOperand &Dst = I.getOperand(0);
107 Register DstReg = Dst.getReg();
108 Register SrcReg = Src.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault18b71332019-07-15 19:44:07 +0000110 if (isVCC(DstReg, MRI)) {
111 if (SrcReg == AMDGPU::SCC) {
112 const TargetRegisterClass *RC
113 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
114 if (!RC)
115 return true;
116 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000117 }
Matt Arsenault18b71332019-07-15 19:44:07 +0000118
119 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
120 .addImm(0)
121 .addReg(SrcReg);
122
123 if (!MRI.getRegClassOrNull(SrcReg))
124 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
125 I.eraseFromParent();
126 return true;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000127 }
128
Tom Stellard1e0edad2018-05-10 21:20:10 +0000129 for (const MachineOperand &MO : I.operands()) {
130 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
131 continue;
132
133 const TargetRegisterClass *RC =
134 TRI.getConstrainedRegClassForOperand(MO, MRI);
135 if (!RC)
136 continue;
137 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
138 }
139 return true;
140}
141
Matt Arsenaulte1006252019-07-01 16:32:47 +0000142bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
143 MachineBasicBlock *BB = I.getParent();
144 MachineFunction *MF = BB->getParent();
145 MachineRegisterInfo &MRI = MF->getRegInfo();
146
147 const Register DefReg = I.getOperand(0).getReg();
148 const LLT DefTy = MRI.getType(DefReg);
149
150 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
151
152 const RegClassOrRegBank &RegClassOrBank =
153 MRI.getRegClassOrRegBank(DefReg);
154
155 const TargetRegisterClass *DefRC
156 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
157 if (!DefRC) {
158 if (!DefTy.isValid()) {
159 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
160 return false;
161 }
162
163 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
164 if (RB.getID() == AMDGPU::SCCRegBankID) {
165 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
166 return false;
167 }
168
169 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
170 if (!DefRC) {
171 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
172 return false;
173 }
174 }
175
176 I.setDesc(TII.get(TargetOpcode::PHI));
177 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
178}
179
Tom Stellardca166212017-01-30 21:56:46 +0000180MachineOperand
181AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000182 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000183 unsigned SubIdx) const {
184
185 MachineInstr *MI = MO.getParent();
186 MachineBasicBlock *BB = MO.getParent()->getParent();
187 MachineFunction *MF = BB->getParent();
188 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000189 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000190
191 if (MO.isReg()) {
192 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
193 unsigned Reg = MO.getReg();
194 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
195 .addReg(Reg, 0, ComposedSubIdx);
196
197 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
198 MO.isKill(), MO.isDead(), MO.isUndef(),
199 MO.isEarlyClobber(), 0, MO.isDebug(),
200 MO.isInternalRead());
201 }
202
203 assert(MO.isImm());
204
205 APInt Imm(64, MO.getImm());
206
207 switch (SubIdx) {
208 default:
209 llvm_unreachable("do not know to split immediate with this sub index.");
210 case AMDGPU::sub0:
211 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
212 case AMDGPU::sub1:
213 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
214 }
215}
216
Tom Stellard390a5f42018-07-13 21:05:14 +0000217static int64_t getConstant(const MachineInstr *MI) {
218 return MI->getOperand(1).getCImm()->getSExtValue();
219}
220
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000221bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000222 MachineBasicBlock *BB = I.getParent();
223 MachineFunction *MF = BB->getParent();
224 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000225 Register DstReg = I.getOperand(0).getReg();
226 const DebugLoc &DL = I.getDebugLoc();
227 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
228 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
229 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000230 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000231
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000232 if (Size == 32) {
233 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000234 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000235 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000236 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000237 .add(I.getOperand(1))
238 .add(I.getOperand(2));
239 I.eraseFromParent();
240 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
241 }
Tom Stellardca166212017-01-30 21:56:46 +0000242
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000243 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000244 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
245 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000246 I.addOperand(*MF, MachineOperand::CreateImm(0));
247 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
248 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
249 }
Tom Stellardca166212017-01-30 21:56:46 +0000250
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000251 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
252
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000253 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
254 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000255 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000256 .addDef(UnusedCarry, RegState::Dead)
257 .add(I.getOperand(1))
258 .add(I.getOperand(2))
259 .addImm(0);
260 I.eraseFromParent();
261 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000262 }
263
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000264 assert(!Sub && "illegal sub should not reach here");
265
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000266 const TargetRegisterClass &RC
267 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
268 const TargetRegisterClass &HalfRC
269 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
270
271 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
272 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
273 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
274 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
275
276 Register DstLo = MRI.createVirtualRegister(&HalfRC);
277 Register DstHi = MRI.createVirtualRegister(&HalfRC);
278
279 if (IsSALU) {
280 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
281 .add(Lo1)
282 .add(Lo2);
283 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
284 .add(Hi1)
285 .add(Hi2);
286 } else {
287 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
288 Register CarryReg = MRI.createVirtualRegister(CarryRC);
289 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
290 .addDef(CarryReg)
291 .add(Lo1)
292 .add(Lo2)
293 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000294 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000295 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
296 .add(Hi1)
297 .add(Hi2)
298 .addReg(CarryReg, RegState::Kill)
299 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000300
301 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
302 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000303 }
304
305 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
306 .addReg(DstLo)
307 .addImm(AMDGPU::sub0)
308 .addReg(DstHi)
309 .addImm(AMDGPU::sub1);
310
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000311
312 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000313 return false;
314
Tom Stellardca166212017-01-30 21:56:46 +0000315 I.eraseFromParent();
316 return true;
317}
318
Tom Stellard41f32192019-02-28 23:37:48 +0000319bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
320 MachineBasicBlock *BB = I.getParent();
321 MachineFunction *MF = BB->getParent();
322 MachineRegisterInfo &MRI = MF->getRegInfo();
323 assert(I.getOperand(2).getImm() % 32 == 0);
324 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
325 const DebugLoc &DL = I.getDebugLoc();
326 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
327 I.getOperand(0).getReg())
328 .addReg(I.getOperand(1).getReg(), 0, SubReg);
329
330 for (const MachineOperand &MO : Copy->operands()) {
331 const TargetRegisterClass *RC =
332 TRI.getConstrainedRegClassForOperand(MO, MRI);
333 if (!RC)
334 continue;
335 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
336 }
337 I.eraseFromParent();
338 return true;
339}
340
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000341bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
342 MachineBasicBlock *BB = MI.getParent();
343 MachineFunction *MF = BB->getParent();
344 MachineRegisterInfo &MRI = MF->getRegInfo();
345 Register DstReg = MI.getOperand(0).getReg();
346 LLT DstTy = MRI.getType(DstReg);
347 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
348
349 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000350 if (SrcSize < 32)
351 return false;
352
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000353 const DebugLoc &DL = MI.getDebugLoc();
354 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
355 const unsigned DstSize = DstTy.getSizeInBits();
356 const TargetRegisterClass *DstRC =
357 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
358 if (!DstRC)
359 return false;
360
361 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
362 MachineInstrBuilder MIB =
363 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
364 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
365 MachineOperand &Src = MI.getOperand(I + 1);
366 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
367 MIB.addImm(SubRegs[I]);
368
369 const TargetRegisterClass *SrcRC
370 = TRI.getConstrainedRegClassForOperand(Src, MRI);
371 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
372 return false;
373 }
374
375 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
376 return false;
377
378 MI.eraseFromParent();
379 return true;
380}
381
Matt Arsenault872f38b2019-07-09 14:02:26 +0000382bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
383 MachineBasicBlock *BB = MI.getParent();
384 MachineFunction *MF = BB->getParent();
385 MachineRegisterInfo &MRI = MF->getRegInfo();
386 const int NumDst = MI.getNumOperands() - 1;
387
388 MachineOperand &Src = MI.getOperand(NumDst);
389
390 Register SrcReg = Src.getReg();
391 Register DstReg0 = MI.getOperand(0).getReg();
392 LLT DstTy = MRI.getType(DstReg0);
393 LLT SrcTy = MRI.getType(SrcReg);
394
395 const unsigned DstSize = DstTy.getSizeInBits();
396 const unsigned SrcSize = SrcTy.getSizeInBits();
397 const DebugLoc &DL = MI.getDebugLoc();
398 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
399
400 const TargetRegisterClass *SrcRC =
401 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
402 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
403 return false;
404
405 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
406
407 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
408 // source, and this relies on the fact that the same subregister indices are
409 // used for both.
410 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
411 for (int I = 0, E = NumDst; I != E; ++I) {
412 MachineOperand &Dst = MI.getOperand(I);
413 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
414 .addReg(SrcReg, SrcFlags, SubRegs[I]);
415
416 const TargetRegisterClass *DstRC =
417 TRI.getConstrainedRegClassForOperand(Dst, MRI);
418 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
419 return false;
420 }
421
422 MI.eraseFromParent();
423 return true;
424}
425
Tom Stellardca166212017-01-30 21:56:46 +0000426bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000427 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000428}
429
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000430bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
431 MachineBasicBlock *BB = I.getParent();
432 MachineFunction *MF = BB->getParent();
433 MachineRegisterInfo &MRI = MF->getRegInfo();
434 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000435
436 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
437 // regbank check here is to know why getConstrainedRegClassForOperand failed.
438 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
439 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
440 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
441 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
442 return true;
443 }
444
445 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000446}
447
Tom Stellard33634d1b2019-03-01 00:50:26 +0000448bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
449 MachineBasicBlock *BB = I.getParent();
450 MachineFunction *MF = BB->getParent();
451 MachineRegisterInfo &MRI = MF->getRegInfo();
452 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
453 DebugLoc DL = I.getDebugLoc();
454 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
455 .addDef(I.getOperand(0).getReg())
456 .addReg(I.getOperand(1).getReg())
457 .addReg(I.getOperand(2).getReg())
458 .addImm(SubReg);
459
460 for (const MachineOperand &MO : Ins->operands()) {
461 if (!MO.isReg())
462 continue;
463 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
464 continue;
465
466 const TargetRegisterClass *RC =
467 TRI.getConstrainedRegClassForOperand(MO, MRI);
468 if (!RC)
469 continue;
470 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
471 }
472 I.eraseFromParent();
473 return true;
474}
475
Matt Arsenault50be3482019-07-02 14:52:16 +0000476bool AMDGPUInstructionSelector::selectG_INTRINSIC(
477 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000478 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000479 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000480 case Intrinsic::maxnum:
481 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000482 case Intrinsic::amdgcn_cvt_pkrtz:
483 return selectImpl(I, CoverageInfo);
Matt Arsenault53fa7592019-07-15 18:25:24 +0000484 case Intrinsic::amdgcn_if_break: {
485 MachineBasicBlock *BB = I.getParent();
486 MachineFunction *MF = BB->getParent();
487 MachineRegisterInfo &MRI = MF->getRegInfo();
488
489 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
490 // SelectionDAG uses for wave32 vs wave64.
491 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
492 .add(I.getOperand(0))
493 .add(I.getOperand(2))
494 .add(I.getOperand(3));
495
496 Register DstReg = I.getOperand(0).getReg();
497 Register Src0Reg = I.getOperand(2).getReg();
498 Register Src1Reg = I.getOperand(3).getReg();
499
500 I.eraseFromParent();
501
502 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
503 if (!MRI.getRegClassOrNull(Reg))
504 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
505 }
506
507 return true;
508 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000509 default:
510 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000511 }
Tom Stellarda9284732018-06-14 19:26:37 +0000512}
513
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000514static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
515 if (Size != 32 && Size != 64)
516 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000517 switch (P) {
518 default:
519 llvm_unreachable("Unknown condition code!");
520 case CmpInst::ICMP_NE:
521 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
522 case CmpInst::ICMP_EQ:
523 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
524 case CmpInst::ICMP_SGT:
525 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
526 case CmpInst::ICMP_SGE:
527 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
528 case CmpInst::ICMP_SLT:
529 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
530 case CmpInst::ICMP_SLE:
531 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
532 case CmpInst::ICMP_UGT:
533 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
534 case CmpInst::ICMP_UGE:
535 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
536 case CmpInst::ICMP_ULT:
537 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
538 case CmpInst::ICMP_ULE:
539 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
540 }
541}
542
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000543int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
544 unsigned Size) const {
545 if (Size == 64) {
546 if (!STI.hasScalarCompareEq64())
547 return -1;
548
549 switch (P) {
550 case CmpInst::ICMP_NE:
551 return AMDGPU::S_CMP_LG_U64;
552 case CmpInst::ICMP_EQ:
553 return AMDGPU::S_CMP_EQ_U64;
554 default:
555 return -1;
556 }
557 }
558
559 if (Size != 32)
560 return -1;
561
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000562 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000563 case CmpInst::ICMP_NE:
564 return AMDGPU::S_CMP_LG_U32;
565 case CmpInst::ICMP_EQ:
566 return AMDGPU::S_CMP_EQ_U32;
567 case CmpInst::ICMP_SGT:
568 return AMDGPU::S_CMP_GT_I32;
569 case CmpInst::ICMP_SGE:
570 return AMDGPU::S_CMP_GE_I32;
571 case CmpInst::ICMP_SLT:
572 return AMDGPU::S_CMP_LT_I32;
573 case CmpInst::ICMP_SLE:
574 return AMDGPU::S_CMP_LE_I32;
575 case CmpInst::ICMP_UGT:
576 return AMDGPU::S_CMP_GT_U32;
577 case CmpInst::ICMP_UGE:
578 return AMDGPU::S_CMP_GE_U32;
579 case CmpInst::ICMP_ULT:
580 return AMDGPU::S_CMP_LT_U32;
581 case CmpInst::ICMP_ULE:
582 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000583 default:
584 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000585 }
586}
587
588bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
589 MachineBasicBlock *BB = I.getParent();
590 MachineFunction *MF = BB->getParent();
591 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000592 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000593
594 unsigned SrcReg = I.getOperand(2).getReg();
595 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000596
597 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000598
599 unsigned CCReg = I.getOperand(0).getReg();
600 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000601 int Opcode = getS_CMPOpcode(Pred, Size);
602 if (Opcode == -1)
603 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000604 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
605 .add(I.getOperand(2))
606 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000607 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
608 .addReg(AMDGPU::SCC);
609 bool Ret =
610 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
611 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000612 I.eraseFromParent();
613 return Ret;
614 }
615
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000616 int Opcode = getV_CMPOpcode(Pred, Size);
617 if (Opcode == -1)
618 return false;
619
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000620 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
621 I.getOperand(0).getReg())
622 .add(I.getOperand(2))
623 .add(I.getOperand(3));
624 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000625 *TRI.getBoolRC(), MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000626 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
627 I.eraseFromParent();
628 return Ret;
629}
630
Tom Stellard390a5f42018-07-13 21:05:14 +0000631static MachineInstr *
632buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
633 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
634 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
635 const DebugLoc &DL = Insert->getDebugLoc();
636 MachineBasicBlock &BB = *Insert->getParent();
637 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
638 return BuildMI(BB, Insert, DL, TII.get(Opcode))
639 .addImm(Tgt)
640 .addReg(Reg0)
641 .addReg(Reg1)
642 .addReg(Reg2)
643 .addReg(Reg3)
644 .addImm(VM)
645 .addImm(Compr)
646 .addImm(Enabled);
647}
648
649bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000650 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000651 MachineBasicBlock *BB = I.getParent();
652 MachineFunction *MF = BB->getParent();
653 MachineRegisterInfo &MRI = MF->getRegInfo();
654
655 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
656 switch (IntrinsicID) {
657 case Intrinsic::amdgcn_exp: {
658 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
659 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
660 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
661 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
662
663 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
664 I.getOperand(4).getReg(),
665 I.getOperand(5).getReg(),
666 I.getOperand(6).getReg(),
667 VM, false, Enabled, Done);
668
669 I.eraseFromParent();
670 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
671 }
672 case Intrinsic::amdgcn_exp_compr: {
673 const DebugLoc &DL = I.getDebugLoc();
674 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
675 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
676 unsigned Reg0 = I.getOperand(3).getReg();
677 unsigned Reg1 = I.getOperand(4).getReg();
678 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
679 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
680 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
681
682 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
683 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
684 true, Enabled, Done);
685
686 I.eraseFromParent();
687 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
688 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000689 case Intrinsic::amdgcn_end_cf: {
690 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
691 // SelectionDAG uses for wave32 vs wave64.
692 BuildMI(*BB, &I, I.getDebugLoc(),
693 TII.get(AMDGPU::SI_END_CF))
694 .add(I.getOperand(1));
695
696 Register Reg = I.getOperand(1).getReg();
697 I.eraseFromParent();
698
699 if (!MRI.getRegClassOrNull(Reg))
700 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
701 return true;
702 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000703 default:
704 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000705 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000706}
707
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000708bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
709 MachineBasicBlock *BB = I.getParent();
710 MachineFunction *MF = BB->getParent();
711 MachineRegisterInfo &MRI = MF->getRegInfo();
712 const DebugLoc &DL = I.getDebugLoc();
713
714 unsigned DstReg = I.getOperand(0).getReg();
715 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000716 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000717 const MachineOperand &CCOp = I.getOperand(1);
718 unsigned CCReg = CCOp.getReg();
719 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000720 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
721 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000722 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
723 .addReg(CCReg);
724
725 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
726 // bank, because it does not cover the register class that we used to represent
727 // for it. So we need to manually set the register class here.
728 if (!MRI.getRegClassOrNull(CCReg))
729 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
730 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
731 .add(I.getOperand(2))
732 .add(I.getOperand(3));
733
734 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
735 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
736 I.eraseFromParent();
737 return Ret;
738 }
739
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000740 // Wide VGPR select should have been split in RegBankSelect.
741 if (Size > 32)
742 return false;
743
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000744 MachineInstr *Select =
745 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
746 .addImm(0)
747 .add(I.getOperand(3))
748 .addImm(0)
749 .add(I.getOperand(2))
750 .add(I.getOperand(1));
751
752 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
753 I.eraseFromParent();
754 return Ret;
755}
756
Tom Stellardca166212017-01-30 21:56:46 +0000757bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
758 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000759 MachineFunction *MF = BB->getParent();
760 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000761 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000762 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
763 if (PtrSize != 64) {
764 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
765 return false;
766 }
767
Tom Stellard655fdd32018-05-11 23:12:49 +0000768 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
769 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000770
771 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000772 switch (StoreSize) {
773 default:
774 return false;
775 case 32:
776 Opcode = AMDGPU::FLAT_STORE_DWORD;
777 break;
778 case 64:
779 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
780 break;
781 case 96:
782 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
783 break;
784 case 128:
785 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
786 break;
787 }
788
789 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000790 .add(I.getOperand(1))
791 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000792 .addImm(0) // offset
793 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000794 .addImm(0) // slc
795 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000796
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000797
Tom Stellardca166212017-01-30 21:56:46 +0000798 // Now that we selected an opcode, we need to constrain the register
799 // operands to use appropriate classes.
800 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
801
802 I.eraseFromParent();
803 return Ret;
804}
805
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000806static int sizeToSubRegIndex(unsigned Size) {
807 switch (Size) {
808 case 32:
809 return AMDGPU::sub0;
810 case 64:
811 return AMDGPU::sub0_sub1;
812 case 96:
813 return AMDGPU::sub0_sub1_sub2;
814 case 128:
815 return AMDGPU::sub0_sub1_sub2_sub3;
816 case 256:
817 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
818 default:
819 if (Size < 32)
820 return AMDGPU::sub0;
821 if (Size > 256)
822 return -1;
823 return sizeToSubRegIndex(PowerOf2Ceil(Size));
824 }
825}
826
827bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
828 MachineBasicBlock *BB = I.getParent();
829 MachineFunction *MF = BB->getParent();
830 MachineRegisterInfo &MRI = MF->getRegInfo();
831
832 unsigned DstReg = I.getOperand(0).getReg();
833 unsigned SrcReg = I.getOperand(1).getReg();
834 const LLT DstTy = MRI.getType(DstReg);
835 const LLT SrcTy = MRI.getType(SrcReg);
836 if (!DstTy.isScalar())
837 return false;
838
839 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
840 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
841 if (SrcRB != DstRB)
842 return false;
843
844 unsigned DstSize = DstTy.getSizeInBits();
845 unsigned SrcSize = SrcTy.getSizeInBits();
846
847 const TargetRegisterClass *SrcRC
848 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
849 const TargetRegisterClass *DstRC
850 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
851
852 if (SrcSize > 32) {
853 int SubRegIdx = sizeToSubRegIndex(DstSize);
854 if (SubRegIdx == -1)
855 return false;
856
857 // Deal with weird cases where the class only partially supports the subreg
858 // index.
859 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
860 if (!SrcRC)
861 return false;
862
863 I.getOperand(1).setSubReg(SubRegIdx);
864 }
865
866 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
867 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
868 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
869 return false;
870 }
871
872 I.setDesc(TII.get(TargetOpcode::COPY));
873 return true;
874}
875
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000876/// \returns true if a bitmask for \p Size bits will be an inline immediate.
877static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
878 Mask = maskTrailingOnes<unsigned>(Size);
879 int SignedMask = static_cast<int>(Mask);
880 return SignedMask >= -16 && SignedMask <= 64;
881}
882
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000883bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
884 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
885 const DebugLoc &DL = I.getDebugLoc();
886 MachineBasicBlock &MBB = *I.getParent();
887 MachineFunction &MF = *MBB.getParent();
888 MachineRegisterInfo &MRI = MF.getRegInfo();
889 const unsigned DstReg = I.getOperand(0).getReg();
890 const unsigned SrcReg = I.getOperand(1).getReg();
891
892 const LLT DstTy = MRI.getType(DstReg);
893 const LLT SrcTy = MRI.getType(SrcReg);
894 const LLT S1 = LLT::scalar(1);
895 const unsigned SrcSize = SrcTy.getSizeInBits();
896 const unsigned DstSize = DstTy.getSizeInBits();
897 if (!DstTy.isScalar())
898 return false;
899
900 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
901
902 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
903 if (SrcTy != S1 || DstSize > 64) // Invalid
904 return false;
905
906 unsigned Opcode =
907 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
908 const TargetRegisterClass *DstRC =
909 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
910
911 // FIXME: Create an extra copy to avoid incorrectly constraining the result
912 // of the scc producer.
913 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
914 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
915 .addReg(SrcReg);
916 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
917 .addReg(TmpReg);
918
919 // The instruction operands are backwards from what you would expect.
920 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
921 .addImm(0)
922 .addImm(Signed ? -1 : 1);
923 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
924 }
925
926 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
927 if (SrcTy != S1) // Invalid
928 return false;
929
930 MachineInstr *ExtI =
931 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
932 .addImm(0) // src0_modifiers
933 .addImm(0) // src0
934 .addImm(0) // src1_modifiers
935 .addImm(Signed ? -1 : 1) // src1
936 .addUse(SrcReg);
937 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
938 }
939
940 if (I.getOpcode() == AMDGPU::G_ANYEXT)
941 return selectCOPY(I);
942
943 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
944 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000945
946 // Try to use an and with a mask if it will save code size.
947 unsigned Mask;
948 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
949 MachineInstr *ExtI =
950 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
951 .addImm(Mask)
952 .addReg(SrcReg);
953 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
954 }
955
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000956 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
957 MachineInstr *ExtI =
958 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
959 .addReg(SrcReg)
960 .addImm(0) // Offset
961 .addImm(SrcSize); // Width
962 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
963 }
964
965 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
966 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
967 return false;
968
969 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
970 const unsigned SextOpc = SrcSize == 8 ?
971 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
972 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
973 .addReg(SrcReg);
974 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
975 }
976
977 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
978 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
979
980 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
981 if (DstSize > 32 && SrcSize <= 32) {
982 // We need a 64-bit register source, but the high bits don't matter.
983 unsigned ExtReg
984 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
985 unsigned UndefReg
986 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
987 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
988 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
989 .addReg(SrcReg)
990 .addImm(AMDGPU::sub0)
991 .addReg(UndefReg)
992 .addImm(AMDGPU::sub1);
993
994 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
995 .addReg(ExtReg)
996 .addImm(SrcSize << 16);
997
998 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
999 }
1000
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001001 unsigned Mask;
1002 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1003 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1004 .addReg(SrcReg)
1005 .addImm(Mask);
1006 } else {
1007 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1008 .addReg(SrcReg)
1009 .addImm(SrcSize << 16);
1010 }
1011
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001012 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1013 }
1014
1015 return false;
1016}
1017
Tom Stellardca166212017-01-30 21:56:46 +00001018bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1019 MachineBasicBlock *BB = I.getParent();
1020 MachineFunction *MF = BB->getParent();
1021 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001022 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001023
Tom Stellarde182b282018-05-15 17:57:09 +00001024 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1025 if (ImmOp.isFPImm()) {
1026 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1027 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1028 } else if (ImmOp.isCImm()) {
1029 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1030 }
1031
1032 unsigned DstReg = I.getOperand(0).getReg();
1033 unsigned Size;
1034 bool IsSgpr;
1035 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1036 if (RB) {
1037 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1038 Size = MRI.getType(DstReg).getSizeInBits();
1039 } else {
1040 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1041 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001042 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001043 }
1044
1045 if (Size != 32 && Size != 64)
1046 return false;
1047
1048 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001049 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001050 I.setDesc(TII.get(Opcode));
1051 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001052 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1053 }
1054
Tom Stellardca166212017-01-30 21:56:46 +00001055 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001056 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1057 &AMDGPU::VGPR_32RegClass;
1058 unsigned LoReg = MRI.createVirtualRegister(RC);
1059 unsigned HiReg = MRI.createVirtualRegister(RC);
1060 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001061
Tom Stellarde182b282018-05-15 17:57:09 +00001062 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001063 .addImm(Imm.trunc(32).getZExtValue());
1064
Tom Stellarde182b282018-05-15 17:57:09 +00001065 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001066 .addImm(Imm.ashr(32).getZExtValue());
1067
Tom Stellarde182b282018-05-15 17:57:09 +00001068 const MachineInstr *RS =
1069 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1070 .addReg(LoReg)
1071 .addImm(AMDGPU::sub0)
1072 .addReg(HiReg)
1073 .addImm(AMDGPU::sub1);
1074
Tom Stellardca166212017-01-30 21:56:46 +00001075 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1076 // work for target independent opcodes
1077 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001078 const TargetRegisterClass *DstRC =
1079 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1080 if (!DstRC)
1081 return true;
1082 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001083}
1084
1085static bool isConstant(const MachineInstr &MI) {
1086 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1087}
1088
1089void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1090 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1091
1092 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1093
1094 assert(PtrMI);
1095
1096 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1097 return;
1098
1099 GEPInfo GEPInfo(*PtrMI);
1100
1101 for (unsigned i = 1, e = 3; i < e; ++i) {
1102 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1103 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1104 assert(OpDef);
1105 if (isConstant(*OpDef)) {
1106 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1107 // are lacking other optimizations.
1108 assert(GEPInfo.Imm == 0);
1109 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1110 continue;
1111 }
1112 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1113 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1114 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1115 else
1116 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1117 }
1118
1119 AddrInfo.push_back(GEPInfo);
1120 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1121}
1122
Tom Stellard79b5c382019-02-20 21:02:37 +00001123bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001124 if (!MI.hasOneMemOperand())
1125 return false;
1126
1127 const MachineMemOperand *MMO = *MI.memoperands_begin();
1128 const Value *Ptr = MMO->getValue();
1129
1130 // UndefValue means this is a load of a kernel input. These are uniform.
1131 // Sometimes LDS instructions have constant pointers.
1132 // If Ptr is null, then that means this mem operand contains a
1133 // PseudoSourceValue like GOT.
1134 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1135 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1136 return true;
1137
Matt Arsenault923712b2018-02-09 16:57:57 +00001138 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1139 return true;
1140
Tom Stellardca166212017-01-30 21:56:46 +00001141 const Instruction *I = dyn_cast<Instruction>(Ptr);
1142 return I && I->getMetadata("amdgpu.uniform");
1143}
1144
Tom Stellardca166212017-01-30 21:56:46 +00001145bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1146 for (const GEPInfo &GEPInfo : AddrInfo) {
1147 if (!GEPInfo.VgprParts.empty())
1148 return true;
1149 }
1150 return false;
1151}
1152
Tom Stellardca166212017-01-30 21:56:46 +00001153bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1154 MachineBasicBlock *BB = I.getParent();
1155 MachineFunction *MF = BB->getParent();
1156 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001157 const DebugLoc &DL = I.getDebugLoc();
1158 Register DstReg = I.getOperand(0).getReg();
1159 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001160 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1161 unsigned Opcode;
1162
Matt Arsenaulta3107272019-07-01 16:36:39 +00001163 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1164 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1165 return false;
1166 }
1167
Tom Stellardca166212017-01-30 21:56:46 +00001168 SmallVector<GEPInfo, 4> AddrInfo;
1169
1170 getAddrModeInfo(I, MRI, AddrInfo);
1171
Tom Stellardca166212017-01-30 21:56:46 +00001172 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001173 case 32:
1174 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1175 break;
1176 case 64:
1177 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1178 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001179 default:
1180 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1181 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001182 }
1183
1184 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1185 .add(I.getOperand(0))
1186 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001187 .addImm(0) // offset
1188 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001189 .addImm(0) // slc
1190 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001191
1192 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1193 I.eraseFromParent();
1194 return Ret;
1195}
1196
Matt Arsenault64642802019-07-01 15:39:27 +00001197bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1198 MachineBasicBlock *BB = I.getParent();
1199 MachineFunction *MF = BB->getParent();
1200 MachineRegisterInfo &MRI = MF->getRegInfo();
1201 MachineOperand &CondOp = I.getOperand(0);
1202 Register CondReg = CondOp.getReg();
1203 const DebugLoc &DL = I.getDebugLoc();
1204
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001205 unsigned BrOpcode;
1206 Register CondPhysReg;
1207 const TargetRegisterClass *ConstrainRC;
1208
1209 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1210 // whether the branch is uniform when selecting the instruction. In
1211 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1212 // RegBankSelect knows what it's doing if the branch condition is scc, even
1213 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001214 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001215 CondPhysReg = AMDGPU::SCC;
1216 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1217 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1218 } else if (isVCC(CondReg, MRI)) {
1219 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1220 // We sort of know that a VCC producer based on the register bank, that ands
1221 // inactive lanes with 0. What if there was a logical operation with vcc
1222 // producers in different blocks/with different exec masks?
1223 // FIXME: Should scc->vcc copies and with exec?
1224 CondPhysReg = TRI.getVCC();
1225 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1226 ConstrainRC = TRI.getBoolRC();
1227 } else
1228 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001229
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001230 if (!MRI.getRegClassOrNull(CondReg))
1231 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001232
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001233 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1234 .addReg(CondReg);
1235 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1236 .addMBB(I.getOperand(1).getMBB());
1237
1238 I.eraseFromParent();
1239 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001240}
1241
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001242bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1243 MachineBasicBlock *BB = I.getParent();
1244 MachineFunction *MF = BB->getParent();
1245 MachineRegisterInfo &MRI = MF->getRegInfo();
1246
1247 Register DstReg = I.getOperand(0).getReg();
1248 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1249 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1250 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1251 if (IsVGPR)
1252 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1253
1254 return RBI.constrainGenericRegister(
1255 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1256}
1257
Daniel Sandersf76f3152017-11-16 00:46:35 +00001258bool AMDGPUInstructionSelector::select(MachineInstr &I,
1259 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001260 if (I.isPHI())
1261 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001262
Tom Stellard7712ee82018-06-22 00:44:29 +00001263 if (!isPreISelGenericOpcode(I.getOpcode())) {
1264 if (I.isCopy())
1265 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001266 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001267 }
Tom Stellardca166212017-01-30 21:56:46 +00001268
1269 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001270 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001271 case TargetOpcode::G_SUB:
1272 if (selectG_ADD_SUB(I))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001273 return true;
1274 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001275 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001276 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001277 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001278 case TargetOpcode::G_BITCAST:
1279 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001280 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001281 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001282 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001283 case TargetOpcode::G_EXTRACT:
1284 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001285 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001286 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001287 case TargetOpcode::G_CONCAT_VECTORS:
1288 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001289 case TargetOpcode::G_UNMERGE_VALUES:
1290 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001291 case TargetOpcode::G_GEP:
1292 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001293 case TargetOpcode::G_IMPLICIT_DEF:
1294 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001295 case TargetOpcode::G_INSERT:
1296 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001297 case TargetOpcode::G_INTRINSIC:
1298 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001299 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1300 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001301 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001302 if (selectG_ICMP(I))
1303 return true;
1304 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001305 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001306 if (selectImpl(I, CoverageInfo))
1307 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001308 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001309 case TargetOpcode::G_SELECT:
1310 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001311 case TargetOpcode::G_STORE:
1312 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001313 case TargetOpcode::G_TRUNC:
1314 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001315 case TargetOpcode::G_SEXT:
1316 case TargetOpcode::G_ZEXT:
1317 case TargetOpcode::G_ANYEXT:
1318 if (selectG_SZA_EXT(I)) {
1319 I.eraseFromParent();
1320 return true;
1321 }
1322
1323 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001324 case TargetOpcode::G_BRCOND:
1325 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001326 case TargetOpcode::G_FRAME_INDEX:
1327 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001328 case TargetOpcode::G_FENCE:
1329 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1330 // is checking for G_CONSTANT
1331 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1332 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001333 }
1334 return false;
1335}
Tom Stellard1dc90202018-05-10 20:53:06 +00001336
Tom Stellard26fac0f2018-06-22 02:54:57 +00001337InstructionSelector::ComplexRendererFns
1338AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1339 return {{
1340 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1341 }};
1342
1343}
1344
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001345std::pair<Register, unsigned>
1346AMDGPUInstructionSelector::selectVOP3ModsImpl(
1347 Register Src, const MachineRegisterInfo &MRI) const {
1348 unsigned Mods = 0;
1349 MachineInstr *MI = MRI.getVRegDef(Src);
1350
1351 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1352 Src = MI->getOperand(1).getReg();
1353 Mods |= SISrcMods::NEG;
1354 MI = MRI.getVRegDef(Src);
1355 }
1356
1357 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1358 Src = MI->getOperand(1).getReg();
1359 Mods |= SISrcMods::ABS;
1360 }
1361
1362 return std::make_pair(Src, Mods);
1363}
1364
Tom Stellard1dc90202018-05-10 20:53:06 +00001365///
1366/// This will select either an SGPR or VGPR operand and will save us from
1367/// having to write an extra tablegen pattern.
1368InstructionSelector::ComplexRendererFns
1369AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1370 return {{
1371 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1372 }};
1373}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001374
1375InstructionSelector::ComplexRendererFns
1376AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001377 MachineRegisterInfo &MRI
1378 = Root.getParent()->getParent()->getParent()->getRegInfo();
1379
1380 Register Src;
1381 unsigned Mods;
1382 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1383
Tom Stellarddcc95e92018-05-11 05:44:16 +00001384 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001385 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1386 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1387 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1388 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001389 }};
1390}
Tom Stellard9a653572018-06-22 02:34:29 +00001391InstructionSelector::ComplexRendererFns
1392AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1393 return {{
1394 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1395 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1396 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1397 }};
1398}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001399
1400InstructionSelector::ComplexRendererFns
1401AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001402 MachineRegisterInfo &MRI
1403 = Root.getParent()->getParent()->getParent()->getRegInfo();
1404
1405 Register Src;
1406 unsigned Mods;
1407 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1408
Tom Stellard46bbbc32018-06-13 22:30:47 +00001409 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001410 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1411 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001412 }};
1413}
Tom Stellard79b5c382019-02-20 21:02:37 +00001414
1415InstructionSelector::ComplexRendererFns
1416AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1417 MachineRegisterInfo &MRI =
1418 Root.getParent()->getParent()->getParent()->getRegInfo();
1419
1420 SmallVector<GEPInfo, 4> AddrInfo;
1421 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1422
1423 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1424 return None;
1425
1426 const GEPInfo &GEPInfo = AddrInfo[0];
1427
1428 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1429 return None;
1430
1431 unsigned PtrReg = GEPInfo.SgprParts[0];
1432 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1433 return {{
1434 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1435 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1436 }};
1437}
1438
1439InstructionSelector::ComplexRendererFns
1440AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1441 MachineRegisterInfo &MRI =
1442 Root.getParent()->getParent()->getParent()->getRegInfo();
1443
1444 SmallVector<GEPInfo, 4> AddrInfo;
1445 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1446
1447 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1448 return None;
1449
1450 const GEPInfo &GEPInfo = AddrInfo[0];
1451 unsigned PtrReg = GEPInfo.SgprParts[0];
1452 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1453 if (!isUInt<32>(EncodedImm))
1454 return None;
1455
1456 return {{
1457 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1458 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1459 }};
1460}
1461
1462InstructionSelector::ComplexRendererFns
1463AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1464 MachineInstr *MI = Root.getParent();
1465 MachineBasicBlock *MBB = MI->getParent();
1466 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1467
1468 SmallVector<GEPInfo, 4> AddrInfo;
1469 getAddrModeInfo(*MI, MRI, AddrInfo);
1470
1471 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1472 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1473 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1474 return None;
1475
1476 const GEPInfo &GEPInfo = AddrInfo[0];
1477 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1478 return None;
1479
1480 // If we make it this far we have a load with an 32-bit immediate offset.
1481 // It is OK to select this using a sgpr offset, because we have already
1482 // failed trying to select this load into one of the _IMM variants since
1483 // the _IMM Patterns are considered before the _SGPR patterns.
1484 unsigned PtrReg = GEPInfo.SgprParts[0];
1485 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1486 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1487 .addImm(GEPInfo.Imm);
1488 return {{
1489 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1490 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1491 }};
1492}