blob: f916154c3191197de532dd75dbb47a40f3bd111b [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault18b71332019-07-15 19:44:07 +000089 const LLT Ty = MRI.getType(Reg);
Matt Arsenault2ab25f92019-07-01 16:06:02 +000090 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault18b71332019-07-15 19:44:07 +000091 Ty.isValid() && Ty.getSizeInBits() == 1;
Matt Arsenault9f992c22019-07-01 13:22:07 +000092 }
93
94 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
95 return RB->getID() == AMDGPU::VCCRegBankID;
96}
97
Tom Stellard1e0edad2018-05-10 21:20:10 +000098bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Matt Arsenault18b71332019-07-15 19:44:07 +000099 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard1e0edad2018-05-10 21:20:10 +0000100 MachineBasicBlock *BB = I.getParent();
101 MachineFunction *MF = BB->getParent();
102 MachineRegisterInfo &MRI = MF->getRegInfo();
103 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000104
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000105 const MachineOperand &Src = I.getOperand(1);
Matt Arsenault18b71332019-07-15 19:44:07 +0000106 MachineOperand &Dst = I.getOperand(0);
107 Register DstReg = Dst.getReg();
108 Register SrcReg = Src.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault18b71332019-07-15 19:44:07 +0000110 if (isVCC(DstReg, MRI)) {
111 if (SrcReg == AMDGPU::SCC) {
112 const TargetRegisterClass *RC
113 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
114 if (!RC)
115 return true;
116 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000117 }
Matt Arsenault18b71332019-07-15 19:44:07 +0000118
Matt Arsenault3bfdb542019-07-15 19:45:49 +0000119 // TODO: Should probably leave the copy and let copyPhysReg expand it.
120 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
121 return false;
122
Matt Arsenault18b71332019-07-15 19:44:07 +0000123 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
124 .addImm(0)
125 .addReg(SrcReg);
126
127 if (!MRI.getRegClassOrNull(SrcReg))
128 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
129 I.eraseFromParent();
130 return true;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000131 }
132
Tom Stellard1e0edad2018-05-10 21:20:10 +0000133 for (const MachineOperand &MO : I.operands()) {
134 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
135 continue;
136
137 const TargetRegisterClass *RC =
138 TRI.getConstrainedRegClassForOperand(MO, MRI);
139 if (!RC)
140 continue;
141 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
142 }
143 return true;
144}
145
Matt Arsenaulte1006252019-07-01 16:32:47 +0000146bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
147 MachineBasicBlock *BB = I.getParent();
148 MachineFunction *MF = BB->getParent();
149 MachineRegisterInfo &MRI = MF->getRegInfo();
150
151 const Register DefReg = I.getOperand(0).getReg();
152 const LLT DefTy = MRI.getType(DefReg);
153
154 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
155
156 const RegClassOrRegBank &RegClassOrBank =
157 MRI.getRegClassOrRegBank(DefReg);
158
159 const TargetRegisterClass *DefRC
160 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
161 if (!DefRC) {
162 if (!DefTy.isValid()) {
163 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
164 return false;
165 }
166
167 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
168 if (RB.getID() == AMDGPU::SCCRegBankID) {
169 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
170 return false;
171 }
172
173 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
174 if (!DefRC) {
175 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
176 return false;
177 }
178 }
179
180 I.setDesc(TII.get(TargetOpcode::PHI));
181 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
182}
183
Tom Stellardca166212017-01-30 21:56:46 +0000184MachineOperand
185AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000186 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000187 unsigned SubIdx) const {
188
189 MachineInstr *MI = MO.getParent();
190 MachineBasicBlock *BB = MO.getParent()->getParent();
191 MachineFunction *MF = BB->getParent();
192 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000193 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000194
195 if (MO.isReg()) {
196 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
197 unsigned Reg = MO.getReg();
198 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
199 .addReg(Reg, 0, ComposedSubIdx);
200
201 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
202 MO.isKill(), MO.isDead(), MO.isUndef(),
203 MO.isEarlyClobber(), 0, MO.isDebug(),
204 MO.isInternalRead());
205 }
206
207 assert(MO.isImm());
208
209 APInt Imm(64, MO.getImm());
210
211 switch (SubIdx) {
212 default:
213 llvm_unreachable("do not know to split immediate with this sub index.");
214 case AMDGPU::sub0:
215 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
216 case AMDGPU::sub1:
217 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
218 }
219}
220
Tom Stellard390a5f42018-07-13 21:05:14 +0000221static int64_t getConstant(const MachineInstr *MI) {
222 return MI->getOperand(1).getCImm()->getSExtValue();
223}
224
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000225bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000226 MachineBasicBlock *BB = I.getParent();
227 MachineFunction *MF = BB->getParent();
228 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000229 Register DstReg = I.getOperand(0).getReg();
230 const DebugLoc &DL = I.getDebugLoc();
231 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
232 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
233 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000234 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000235
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000236 if (Size == 32) {
237 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000238 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000239 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000240 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000241 .add(I.getOperand(1))
242 .add(I.getOperand(2));
243 I.eraseFromParent();
244 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
245 }
Tom Stellardca166212017-01-30 21:56:46 +0000246
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000247 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000248 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
249 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000250 I.addOperand(*MF, MachineOperand::CreateImm(0));
251 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
252 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
253 }
Tom Stellardca166212017-01-30 21:56:46 +0000254
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000255 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
256
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000257 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
258 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000259 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000260 .addDef(UnusedCarry, RegState::Dead)
261 .add(I.getOperand(1))
262 .add(I.getOperand(2))
263 .addImm(0);
264 I.eraseFromParent();
265 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000266 }
267
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000268 assert(!Sub && "illegal sub should not reach here");
269
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000270 const TargetRegisterClass &RC
271 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
272 const TargetRegisterClass &HalfRC
273 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
274
275 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
276 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
277 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
278 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
279
280 Register DstLo = MRI.createVirtualRegister(&HalfRC);
281 Register DstHi = MRI.createVirtualRegister(&HalfRC);
282
283 if (IsSALU) {
284 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
285 .add(Lo1)
286 .add(Lo2);
287 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
288 .add(Hi1)
289 .add(Hi2);
290 } else {
291 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
292 Register CarryReg = MRI.createVirtualRegister(CarryRC);
293 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
294 .addDef(CarryReg)
295 .add(Lo1)
296 .add(Lo2)
297 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000298 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000299 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
300 .add(Hi1)
301 .add(Hi2)
302 .addReg(CarryReg, RegState::Kill)
303 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000304
305 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
306 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000307 }
308
309 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
310 .addReg(DstLo)
311 .addImm(AMDGPU::sub0)
312 .addReg(DstHi)
313 .addImm(AMDGPU::sub1);
314
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000315
316 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000317 return false;
318
Tom Stellardca166212017-01-30 21:56:46 +0000319 I.eraseFromParent();
320 return true;
321}
322
Tom Stellard41f32192019-02-28 23:37:48 +0000323bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
324 MachineBasicBlock *BB = I.getParent();
325 MachineFunction *MF = BB->getParent();
326 MachineRegisterInfo &MRI = MF->getRegInfo();
327 assert(I.getOperand(2).getImm() % 32 == 0);
328 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
329 const DebugLoc &DL = I.getDebugLoc();
330 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
331 I.getOperand(0).getReg())
332 .addReg(I.getOperand(1).getReg(), 0, SubReg);
333
334 for (const MachineOperand &MO : Copy->operands()) {
335 const TargetRegisterClass *RC =
336 TRI.getConstrainedRegClassForOperand(MO, MRI);
337 if (!RC)
338 continue;
339 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
340 }
341 I.eraseFromParent();
342 return true;
343}
344
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000345bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
346 MachineBasicBlock *BB = MI.getParent();
347 MachineFunction *MF = BB->getParent();
348 MachineRegisterInfo &MRI = MF->getRegInfo();
349 Register DstReg = MI.getOperand(0).getReg();
350 LLT DstTy = MRI.getType(DstReg);
351 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
352
353 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000354 if (SrcSize < 32)
355 return false;
356
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000357 const DebugLoc &DL = MI.getDebugLoc();
358 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
359 const unsigned DstSize = DstTy.getSizeInBits();
360 const TargetRegisterClass *DstRC =
361 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
362 if (!DstRC)
363 return false;
364
365 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
366 MachineInstrBuilder MIB =
367 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
368 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
369 MachineOperand &Src = MI.getOperand(I + 1);
370 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
371 MIB.addImm(SubRegs[I]);
372
373 const TargetRegisterClass *SrcRC
374 = TRI.getConstrainedRegClassForOperand(Src, MRI);
375 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
376 return false;
377 }
378
379 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
380 return false;
381
382 MI.eraseFromParent();
383 return true;
384}
385
Matt Arsenault872f38b2019-07-09 14:02:26 +0000386bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
387 MachineBasicBlock *BB = MI.getParent();
388 MachineFunction *MF = BB->getParent();
389 MachineRegisterInfo &MRI = MF->getRegInfo();
390 const int NumDst = MI.getNumOperands() - 1;
391
392 MachineOperand &Src = MI.getOperand(NumDst);
393
394 Register SrcReg = Src.getReg();
395 Register DstReg0 = MI.getOperand(0).getReg();
396 LLT DstTy = MRI.getType(DstReg0);
397 LLT SrcTy = MRI.getType(SrcReg);
398
399 const unsigned DstSize = DstTy.getSizeInBits();
400 const unsigned SrcSize = SrcTy.getSizeInBits();
401 const DebugLoc &DL = MI.getDebugLoc();
402 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
403
404 const TargetRegisterClass *SrcRC =
405 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
406 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
407 return false;
408
409 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
410
411 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
412 // source, and this relies on the fact that the same subregister indices are
413 // used for both.
414 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
415 for (int I = 0, E = NumDst; I != E; ++I) {
416 MachineOperand &Dst = MI.getOperand(I);
417 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
418 .addReg(SrcReg, SrcFlags, SubRegs[I]);
419
420 const TargetRegisterClass *DstRC =
421 TRI.getConstrainedRegClassForOperand(Dst, MRI);
422 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
423 return false;
424 }
425
426 MI.eraseFromParent();
427 return true;
428}
429
Tom Stellardca166212017-01-30 21:56:46 +0000430bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000431 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000432}
433
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000434bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
435 MachineBasicBlock *BB = I.getParent();
436 MachineFunction *MF = BB->getParent();
437 MachineRegisterInfo &MRI = MF->getRegInfo();
438 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000439
440 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
441 // regbank check here is to know why getConstrainedRegClassForOperand failed.
442 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
443 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
444 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
445 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
446 return true;
447 }
448
449 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000450}
451
Tom Stellard33634d1b2019-03-01 00:50:26 +0000452bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
453 MachineBasicBlock *BB = I.getParent();
454 MachineFunction *MF = BB->getParent();
455 MachineRegisterInfo &MRI = MF->getRegInfo();
456 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
457 DebugLoc DL = I.getDebugLoc();
458 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
459 .addDef(I.getOperand(0).getReg())
460 .addReg(I.getOperand(1).getReg())
461 .addReg(I.getOperand(2).getReg())
462 .addImm(SubReg);
463
464 for (const MachineOperand &MO : Ins->operands()) {
465 if (!MO.isReg())
466 continue;
467 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
468 continue;
469
470 const TargetRegisterClass *RC =
471 TRI.getConstrainedRegClassForOperand(MO, MRI);
472 if (!RC)
473 continue;
474 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
475 }
476 I.eraseFromParent();
477 return true;
478}
479
Matt Arsenault50be3482019-07-02 14:52:16 +0000480bool AMDGPUInstructionSelector::selectG_INTRINSIC(
481 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000482 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000483 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000484 case Intrinsic::maxnum:
485 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000486 case Intrinsic::amdgcn_cvt_pkrtz:
487 return selectImpl(I, CoverageInfo);
Matt Arsenault53fa7592019-07-15 18:25:24 +0000488 case Intrinsic::amdgcn_if_break: {
489 MachineBasicBlock *BB = I.getParent();
490 MachineFunction *MF = BB->getParent();
491 MachineRegisterInfo &MRI = MF->getRegInfo();
492
493 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
494 // SelectionDAG uses for wave32 vs wave64.
495 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
496 .add(I.getOperand(0))
497 .add(I.getOperand(2))
498 .add(I.getOperand(3));
499
500 Register DstReg = I.getOperand(0).getReg();
501 Register Src0Reg = I.getOperand(2).getReg();
502 Register Src1Reg = I.getOperand(3).getReg();
503
504 I.eraseFromParent();
505
506 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
507 if (!MRI.getRegClassOrNull(Reg))
508 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
509 }
510
511 return true;
512 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000513 default:
514 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000515 }
Tom Stellarda9284732018-06-14 19:26:37 +0000516}
517
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000518static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
519 if (Size != 32 && Size != 64)
520 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000521 switch (P) {
522 default:
523 llvm_unreachable("Unknown condition code!");
524 case CmpInst::ICMP_NE:
525 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
526 case CmpInst::ICMP_EQ:
527 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
528 case CmpInst::ICMP_SGT:
529 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
530 case CmpInst::ICMP_SGE:
531 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
532 case CmpInst::ICMP_SLT:
533 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
534 case CmpInst::ICMP_SLE:
535 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
536 case CmpInst::ICMP_UGT:
537 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
538 case CmpInst::ICMP_UGE:
539 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
540 case CmpInst::ICMP_ULT:
541 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
542 case CmpInst::ICMP_ULE:
543 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
544 }
545}
546
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000547int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
548 unsigned Size) const {
549 if (Size == 64) {
550 if (!STI.hasScalarCompareEq64())
551 return -1;
552
553 switch (P) {
554 case CmpInst::ICMP_NE:
555 return AMDGPU::S_CMP_LG_U64;
556 case CmpInst::ICMP_EQ:
557 return AMDGPU::S_CMP_EQ_U64;
558 default:
559 return -1;
560 }
561 }
562
563 if (Size != 32)
564 return -1;
565
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000566 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000567 case CmpInst::ICMP_NE:
568 return AMDGPU::S_CMP_LG_U32;
569 case CmpInst::ICMP_EQ:
570 return AMDGPU::S_CMP_EQ_U32;
571 case CmpInst::ICMP_SGT:
572 return AMDGPU::S_CMP_GT_I32;
573 case CmpInst::ICMP_SGE:
574 return AMDGPU::S_CMP_GE_I32;
575 case CmpInst::ICMP_SLT:
576 return AMDGPU::S_CMP_LT_I32;
577 case CmpInst::ICMP_SLE:
578 return AMDGPU::S_CMP_LE_I32;
579 case CmpInst::ICMP_UGT:
580 return AMDGPU::S_CMP_GT_U32;
581 case CmpInst::ICMP_UGE:
582 return AMDGPU::S_CMP_GE_U32;
583 case CmpInst::ICMP_ULT:
584 return AMDGPU::S_CMP_LT_U32;
585 case CmpInst::ICMP_ULE:
586 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000587 default:
588 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000589 }
590}
591
592bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
593 MachineBasicBlock *BB = I.getParent();
594 MachineFunction *MF = BB->getParent();
595 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000596 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000597
598 unsigned SrcReg = I.getOperand(2).getReg();
599 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000600
601 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000602
603 unsigned CCReg = I.getOperand(0).getReg();
604 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000605 int Opcode = getS_CMPOpcode(Pred, Size);
606 if (Opcode == -1)
607 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000608 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
609 .add(I.getOperand(2))
610 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000611 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
612 .addReg(AMDGPU::SCC);
613 bool Ret =
614 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
615 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000616 I.eraseFromParent();
617 return Ret;
618 }
619
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000620 int Opcode = getV_CMPOpcode(Pred, Size);
621 if (Opcode == -1)
622 return false;
623
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000624 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
625 I.getOperand(0).getReg())
626 .add(I.getOperand(2))
627 .add(I.getOperand(3));
628 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000629 *TRI.getBoolRC(), MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000630 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
631 I.eraseFromParent();
632 return Ret;
633}
634
Tom Stellard390a5f42018-07-13 21:05:14 +0000635static MachineInstr *
636buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
637 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
638 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
639 const DebugLoc &DL = Insert->getDebugLoc();
640 MachineBasicBlock &BB = *Insert->getParent();
641 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
642 return BuildMI(BB, Insert, DL, TII.get(Opcode))
643 .addImm(Tgt)
644 .addReg(Reg0)
645 .addReg(Reg1)
646 .addReg(Reg2)
647 .addReg(Reg3)
648 .addImm(VM)
649 .addImm(Compr)
650 .addImm(Enabled);
651}
652
653bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000654 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000655 MachineBasicBlock *BB = I.getParent();
656 MachineFunction *MF = BB->getParent();
657 MachineRegisterInfo &MRI = MF->getRegInfo();
658
659 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
660 switch (IntrinsicID) {
661 case Intrinsic::amdgcn_exp: {
662 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
663 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
664 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
665 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
666
667 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
668 I.getOperand(4).getReg(),
669 I.getOperand(5).getReg(),
670 I.getOperand(6).getReg(),
671 VM, false, Enabled, Done);
672
673 I.eraseFromParent();
674 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
675 }
676 case Intrinsic::amdgcn_exp_compr: {
677 const DebugLoc &DL = I.getDebugLoc();
678 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
679 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
680 unsigned Reg0 = I.getOperand(3).getReg();
681 unsigned Reg1 = I.getOperand(4).getReg();
682 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
683 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
684 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
685
686 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
687 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
688 true, Enabled, Done);
689
690 I.eraseFromParent();
691 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
692 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000693 case Intrinsic::amdgcn_end_cf: {
694 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
695 // SelectionDAG uses for wave32 vs wave64.
696 BuildMI(*BB, &I, I.getDebugLoc(),
697 TII.get(AMDGPU::SI_END_CF))
698 .add(I.getOperand(1));
699
700 Register Reg = I.getOperand(1).getReg();
701 I.eraseFromParent();
702
703 if (!MRI.getRegClassOrNull(Reg))
704 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
705 return true;
706 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000707 default:
708 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000709 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000710}
711
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000712bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
713 MachineBasicBlock *BB = I.getParent();
714 MachineFunction *MF = BB->getParent();
715 MachineRegisterInfo &MRI = MF->getRegInfo();
716 const DebugLoc &DL = I.getDebugLoc();
717
718 unsigned DstReg = I.getOperand(0).getReg();
719 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000720 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000721 const MachineOperand &CCOp = I.getOperand(1);
722 unsigned CCReg = CCOp.getReg();
723 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000724 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
725 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000726 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
727 .addReg(CCReg);
728
729 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
730 // bank, because it does not cover the register class that we used to represent
731 // for it. So we need to manually set the register class here.
732 if (!MRI.getRegClassOrNull(CCReg))
733 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
734 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
735 .add(I.getOperand(2))
736 .add(I.getOperand(3));
737
738 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
739 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
740 I.eraseFromParent();
741 return Ret;
742 }
743
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000744 // Wide VGPR select should have been split in RegBankSelect.
745 if (Size > 32)
746 return false;
747
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000748 MachineInstr *Select =
749 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
750 .addImm(0)
751 .add(I.getOperand(3))
752 .addImm(0)
753 .add(I.getOperand(2))
754 .add(I.getOperand(1));
755
756 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
757 I.eraseFromParent();
758 return Ret;
759}
760
Tom Stellardca166212017-01-30 21:56:46 +0000761bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
762 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000763 MachineFunction *MF = BB->getParent();
764 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000765 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000766 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
767 if (PtrSize != 64) {
768 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
769 return false;
770 }
771
Tom Stellard655fdd32018-05-11 23:12:49 +0000772 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
773 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000774
775 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000776 switch (StoreSize) {
777 default:
778 return false;
779 case 32:
780 Opcode = AMDGPU::FLAT_STORE_DWORD;
781 break;
782 case 64:
783 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
784 break;
785 case 96:
786 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
787 break;
788 case 128:
789 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
790 break;
791 }
792
793 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000794 .add(I.getOperand(1))
795 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000796 .addImm(0) // offset
797 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000798 .addImm(0) // slc
799 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000800
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000801
Tom Stellardca166212017-01-30 21:56:46 +0000802 // Now that we selected an opcode, we need to constrain the register
803 // operands to use appropriate classes.
804 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
805
806 I.eraseFromParent();
807 return Ret;
808}
809
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000810static int sizeToSubRegIndex(unsigned Size) {
811 switch (Size) {
812 case 32:
813 return AMDGPU::sub0;
814 case 64:
815 return AMDGPU::sub0_sub1;
816 case 96:
817 return AMDGPU::sub0_sub1_sub2;
818 case 128:
819 return AMDGPU::sub0_sub1_sub2_sub3;
820 case 256:
821 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
822 default:
823 if (Size < 32)
824 return AMDGPU::sub0;
825 if (Size > 256)
826 return -1;
827 return sizeToSubRegIndex(PowerOf2Ceil(Size));
828 }
829}
830
831bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
832 MachineBasicBlock *BB = I.getParent();
833 MachineFunction *MF = BB->getParent();
834 MachineRegisterInfo &MRI = MF->getRegInfo();
835
836 unsigned DstReg = I.getOperand(0).getReg();
837 unsigned SrcReg = I.getOperand(1).getReg();
838 const LLT DstTy = MRI.getType(DstReg);
839 const LLT SrcTy = MRI.getType(SrcReg);
840 if (!DstTy.isScalar())
841 return false;
842
843 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
844 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
845 if (SrcRB != DstRB)
846 return false;
847
848 unsigned DstSize = DstTy.getSizeInBits();
849 unsigned SrcSize = SrcTy.getSizeInBits();
850
851 const TargetRegisterClass *SrcRC
852 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
853 const TargetRegisterClass *DstRC
854 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
855
856 if (SrcSize > 32) {
857 int SubRegIdx = sizeToSubRegIndex(DstSize);
858 if (SubRegIdx == -1)
859 return false;
860
861 // Deal with weird cases where the class only partially supports the subreg
862 // index.
863 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
864 if (!SrcRC)
865 return false;
866
867 I.getOperand(1).setSubReg(SubRegIdx);
868 }
869
870 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
871 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
872 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
873 return false;
874 }
875
876 I.setDesc(TII.get(TargetOpcode::COPY));
877 return true;
878}
879
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000880/// \returns true if a bitmask for \p Size bits will be an inline immediate.
881static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
882 Mask = maskTrailingOnes<unsigned>(Size);
883 int SignedMask = static_cast<int>(Mask);
884 return SignedMask >= -16 && SignedMask <= 64;
885}
886
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000887bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
888 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
889 const DebugLoc &DL = I.getDebugLoc();
890 MachineBasicBlock &MBB = *I.getParent();
891 MachineFunction &MF = *MBB.getParent();
892 MachineRegisterInfo &MRI = MF.getRegInfo();
893 const unsigned DstReg = I.getOperand(0).getReg();
894 const unsigned SrcReg = I.getOperand(1).getReg();
895
896 const LLT DstTy = MRI.getType(DstReg);
897 const LLT SrcTy = MRI.getType(SrcReg);
898 const LLT S1 = LLT::scalar(1);
899 const unsigned SrcSize = SrcTy.getSizeInBits();
900 const unsigned DstSize = DstTy.getSizeInBits();
901 if (!DstTy.isScalar())
902 return false;
903
904 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
905
906 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
907 if (SrcTy != S1 || DstSize > 64) // Invalid
908 return false;
909
910 unsigned Opcode =
911 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
912 const TargetRegisterClass *DstRC =
913 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
914
915 // FIXME: Create an extra copy to avoid incorrectly constraining the result
916 // of the scc producer.
917 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
918 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
919 .addReg(SrcReg);
920 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
921 .addReg(TmpReg);
922
923 // The instruction operands are backwards from what you would expect.
924 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
925 .addImm(0)
926 .addImm(Signed ? -1 : 1);
927 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
928 }
929
930 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
931 if (SrcTy != S1) // Invalid
932 return false;
933
934 MachineInstr *ExtI =
935 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
936 .addImm(0) // src0_modifiers
937 .addImm(0) // src0
938 .addImm(0) // src1_modifiers
939 .addImm(Signed ? -1 : 1) // src1
940 .addUse(SrcReg);
941 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
942 }
943
944 if (I.getOpcode() == AMDGPU::G_ANYEXT)
945 return selectCOPY(I);
946
947 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
948 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000949
950 // Try to use an and with a mask if it will save code size.
951 unsigned Mask;
952 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
953 MachineInstr *ExtI =
954 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
955 .addImm(Mask)
956 .addReg(SrcReg);
957 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
958 }
959
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000960 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
961 MachineInstr *ExtI =
962 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
963 .addReg(SrcReg)
964 .addImm(0) // Offset
965 .addImm(SrcSize); // Width
966 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
967 }
968
969 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
970 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
971 return false;
972
973 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
974 const unsigned SextOpc = SrcSize == 8 ?
975 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
976 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
977 .addReg(SrcReg);
978 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
979 }
980
981 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
982 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
983
984 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
985 if (DstSize > 32 && SrcSize <= 32) {
986 // We need a 64-bit register source, but the high bits don't matter.
987 unsigned ExtReg
988 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
989 unsigned UndefReg
990 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
991 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
992 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
993 .addReg(SrcReg)
994 .addImm(AMDGPU::sub0)
995 .addReg(UndefReg)
996 .addImm(AMDGPU::sub1);
997
998 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
999 .addReg(ExtReg)
1000 .addImm(SrcSize << 16);
1001
1002 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1003 }
1004
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001005 unsigned Mask;
1006 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1007 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1008 .addReg(SrcReg)
1009 .addImm(Mask);
1010 } else {
1011 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1012 .addReg(SrcReg)
1013 .addImm(SrcSize << 16);
1014 }
1015
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001016 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1017 }
1018
1019 return false;
1020}
1021
Tom Stellardca166212017-01-30 21:56:46 +00001022bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1023 MachineBasicBlock *BB = I.getParent();
1024 MachineFunction *MF = BB->getParent();
1025 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001026 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001027
Tom Stellarde182b282018-05-15 17:57:09 +00001028 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1029 if (ImmOp.isFPImm()) {
1030 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1031 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1032 } else if (ImmOp.isCImm()) {
1033 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1034 }
1035
1036 unsigned DstReg = I.getOperand(0).getReg();
1037 unsigned Size;
1038 bool IsSgpr;
1039 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1040 if (RB) {
1041 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1042 Size = MRI.getType(DstReg).getSizeInBits();
1043 } else {
1044 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1045 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001046 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001047 }
1048
1049 if (Size != 32 && Size != 64)
1050 return false;
1051
1052 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001053 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001054 I.setDesc(TII.get(Opcode));
1055 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001056 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1057 }
1058
Tom Stellardca166212017-01-30 21:56:46 +00001059 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001060 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1061 &AMDGPU::VGPR_32RegClass;
1062 unsigned LoReg = MRI.createVirtualRegister(RC);
1063 unsigned HiReg = MRI.createVirtualRegister(RC);
1064 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001065
Tom Stellarde182b282018-05-15 17:57:09 +00001066 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001067 .addImm(Imm.trunc(32).getZExtValue());
1068
Tom Stellarde182b282018-05-15 17:57:09 +00001069 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001070 .addImm(Imm.ashr(32).getZExtValue());
1071
Tom Stellarde182b282018-05-15 17:57:09 +00001072 const MachineInstr *RS =
1073 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1074 .addReg(LoReg)
1075 .addImm(AMDGPU::sub0)
1076 .addReg(HiReg)
1077 .addImm(AMDGPU::sub1);
1078
Tom Stellardca166212017-01-30 21:56:46 +00001079 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1080 // work for target independent opcodes
1081 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001082 const TargetRegisterClass *DstRC =
1083 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1084 if (!DstRC)
1085 return true;
1086 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001087}
1088
1089static bool isConstant(const MachineInstr &MI) {
1090 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1091}
1092
1093void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1094 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1095
1096 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1097
1098 assert(PtrMI);
1099
1100 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1101 return;
1102
1103 GEPInfo GEPInfo(*PtrMI);
1104
1105 for (unsigned i = 1, e = 3; i < e; ++i) {
1106 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1107 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1108 assert(OpDef);
1109 if (isConstant(*OpDef)) {
1110 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1111 // are lacking other optimizations.
1112 assert(GEPInfo.Imm == 0);
1113 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1114 continue;
1115 }
1116 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1117 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1118 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1119 else
1120 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1121 }
1122
1123 AddrInfo.push_back(GEPInfo);
1124 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1125}
1126
Tom Stellard79b5c382019-02-20 21:02:37 +00001127bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001128 if (!MI.hasOneMemOperand())
1129 return false;
1130
1131 const MachineMemOperand *MMO = *MI.memoperands_begin();
1132 const Value *Ptr = MMO->getValue();
1133
1134 // UndefValue means this is a load of a kernel input. These are uniform.
1135 // Sometimes LDS instructions have constant pointers.
1136 // If Ptr is null, then that means this mem operand contains a
1137 // PseudoSourceValue like GOT.
1138 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1139 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1140 return true;
1141
Matt Arsenault923712b2018-02-09 16:57:57 +00001142 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1143 return true;
1144
Tom Stellardca166212017-01-30 21:56:46 +00001145 const Instruction *I = dyn_cast<Instruction>(Ptr);
1146 return I && I->getMetadata("amdgpu.uniform");
1147}
1148
Tom Stellardca166212017-01-30 21:56:46 +00001149bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1150 for (const GEPInfo &GEPInfo : AddrInfo) {
1151 if (!GEPInfo.VgprParts.empty())
1152 return true;
1153 }
1154 return false;
1155}
1156
Tom Stellardca166212017-01-30 21:56:46 +00001157bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1158 MachineBasicBlock *BB = I.getParent();
1159 MachineFunction *MF = BB->getParent();
1160 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001161 const DebugLoc &DL = I.getDebugLoc();
1162 Register DstReg = I.getOperand(0).getReg();
1163 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001164 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1165 unsigned Opcode;
1166
Matt Arsenaulta3107272019-07-01 16:36:39 +00001167 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1168 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1169 return false;
1170 }
1171
Tom Stellardca166212017-01-30 21:56:46 +00001172 SmallVector<GEPInfo, 4> AddrInfo;
1173
1174 getAddrModeInfo(I, MRI, AddrInfo);
1175
Tom Stellardca166212017-01-30 21:56:46 +00001176 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001177 case 32:
1178 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1179 break;
1180 case 64:
1181 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1182 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001183 default:
1184 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1185 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001186 }
1187
1188 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1189 .add(I.getOperand(0))
1190 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001191 .addImm(0) // offset
1192 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001193 .addImm(0) // slc
1194 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001195
1196 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1197 I.eraseFromParent();
1198 return Ret;
1199}
1200
Matt Arsenault64642802019-07-01 15:39:27 +00001201bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1202 MachineBasicBlock *BB = I.getParent();
1203 MachineFunction *MF = BB->getParent();
1204 MachineRegisterInfo &MRI = MF->getRegInfo();
1205 MachineOperand &CondOp = I.getOperand(0);
1206 Register CondReg = CondOp.getReg();
1207 const DebugLoc &DL = I.getDebugLoc();
1208
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001209 unsigned BrOpcode;
1210 Register CondPhysReg;
1211 const TargetRegisterClass *ConstrainRC;
1212
1213 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1214 // whether the branch is uniform when selecting the instruction. In
1215 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1216 // RegBankSelect knows what it's doing if the branch condition is scc, even
1217 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001218 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001219 CondPhysReg = AMDGPU::SCC;
1220 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1221 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1222 } else if (isVCC(CondReg, MRI)) {
1223 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1224 // We sort of know that a VCC producer based on the register bank, that ands
1225 // inactive lanes with 0. What if there was a logical operation with vcc
1226 // producers in different blocks/with different exec masks?
1227 // FIXME: Should scc->vcc copies and with exec?
1228 CondPhysReg = TRI.getVCC();
1229 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1230 ConstrainRC = TRI.getBoolRC();
1231 } else
1232 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001233
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001234 if (!MRI.getRegClassOrNull(CondReg))
1235 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001236
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001237 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1238 .addReg(CondReg);
1239 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1240 .addMBB(I.getOperand(1).getMBB());
1241
1242 I.eraseFromParent();
1243 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001244}
1245
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001246bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1247 MachineBasicBlock *BB = I.getParent();
1248 MachineFunction *MF = BB->getParent();
1249 MachineRegisterInfo &MRI = MF->getRegInfo();
1250
1251 Register DstReg = I.getOperand(0).getReg();
1252 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1253 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1254 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1255 if (IsVGPR)
1256 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1257
1258 return RBI.constrainGenericRegister(
1259 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1260}
1261
Daniel Sandersf76f3152017-11-16 00:46:35 +00001262bool AMDGPUInstructionSelector::select(MachineInstr &I,
1263 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001264 if (I.isPHI())
1265 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001266
Tom Stellard7712ee82018-06-22 00:44:29 +00001267 if (!isPreISelGenericOpcode(I.getOpcode())) {
1268 if (I.isCopy())
1269 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001270 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001271 }
Tom Stellardca166212017-01-30 21:56:46 +00001272
1273 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001274 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001275 case TargetOpcode::G_SUB:
1276 if (selectG_ADD_SUB(I))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001277 return true;
1278 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001279 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001280 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001281 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001282 case TargetOpcode::G_BITCAST:
1283 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001284 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001285 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001286 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001287 case TargetOpcode::G_EXTRACT:
1288 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001289 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001290 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001291 case TargetOpcode::G_CONCAT_VECTORS:
1292 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001293 case TargetOpcode::G_UNMERGE_VALUES:
1294 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001295 case TargetOpcode::G_GEP:
1296 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001297 case TargetOpcode::G_IMPLICIT_DEF:
1298 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001299 case TargetOpcode::G_INSERT:
1300 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001301 case TargetOpcode::G_INTRINSIC:
1302 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001303 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1304 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001305 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001306 if (selectG_ICMP(I))
1307 return true;
1308 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001309 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001310 if (selectImpl(I, CoverageInfo))
1311 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001312 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001313 case TargetOpcode::G_SELECT:
1314 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001315 case TargetOpcode::G_STORE:
1316 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001317 case TargetOpcode::G_TRUNC:
1318 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001319 case TargetOpcode::G_SEXT:
1320 case TargetOpcode::G_ZEXT:
1321 case TargetOpcode::G_ANYEXT:
1322 if (selectG_SZA_EXT(I)) {
1323 I.eraseFromParent();
1324 return true;
1325 }
1326
1327 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001328 case TargetOpcode::G_BRCOND:
1329 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001330 case TargetOpcode::G_FRAME_INDEX:
1331 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001332 case TargetOpcode::G_FENCE:
1333 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1334 // is checking for G_CONSTANT
1335 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1336 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001337 }
1338 return false;
1339}
Tom Stellard1dc90202018-05-10 20:53:06 +00001340
Tom Stellard26fac0f2018-06-22 02:54:57 +00001341InstructionSelector::ComplexRendererFns
1342AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1343 return {{
1344 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1345 }};
1346
1347}
1348
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001349std::pair<Register, unsigned>
1350AMDGPUInstructionSelector::selectVOP3ModsImpl(
1351 Register Src, const MachineRegisterInfo &MRI) const {
1352 unsigned Mods = 0;
1353 MachineInstr *MI = MRI.getVRegDef(Src);
1354
1355 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1356 Src = MI->getOperand(1).getReg();
1357 Mods |= SISrcMods::NEG;
1358 MI = MRI.getVRegDef(Src);
1359 }
1360
1361 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1362 Src = MI->getOperand(1).getReg();
1363 Mods |= SISrcMods::ABS;
1364 }
1365
1366 return std::make_pair(Src, Mods);
1367}
1368
Tom Stellard1dc90202018-05-10 20:53:06 +00001369///
1370/// This will select either an SGPR or VGPR operand and will save us from
1371/// having to write an extra tablegen pattern.
1372InstructionSelector::ComplexRendererFns
1373AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1374 return {{
1375 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1376 }};
1377}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001378
1379InstructionSelector::ComplexRendererFns
1380AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001381 MachineRegisterInfo &MRI
1382 = Root.getParent()->getParent()->getParent()->getRegInfo();
1383
1384 Register Src;
1385 unsigned Mods;
1386 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1387
Tom Stellarddcc95e92018-05-11 05:44:16 +00001388 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001389 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1390 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1391 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1392 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001393 }};
1394}
Tom Stellard9a653572018-06-22 02:34:29 +00001395InstructionSelector::ComplexRendererFns
1396AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1397 return {{
1398 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1399 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1400 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1401 }};
1402}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001403
1404InstructionSelector::ComplexRendererFns
1405AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001406 MachineRegisterInfo &MRI
1407 = Root.getParent()->getParent()->getParent()->getRegInfo();
1408
1409 Register Src;
1410 unsigned Mods;
1411 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1412
Tom Stellard46bbbc32018-06-13 22:30:47 +00001413 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001414 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1415 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001416 }};
1417}
Tom Stellard79b5c382019-02-20 21:02:37 +00001418
1419InstructionSelector::ComplexRendererFns
1420AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1421 MachineRegisterInfo &MRI =
1422 Root.getParent()->getParent()->getParent()->getRegInfo();
1423
1424 SmallVector<GEPInfo, 4> AddrInfo;
1425 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1426
1427 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1428 return None;
1429
1430 const GEPInfo &GEPInfo = AddrInfo[0];
1431
1432 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1433 return None;
1434
1435 unsigned PtrReg = GEPInfo.SgprParts[0];
1436 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1437 return {{
1438 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1439 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1440 }};
1441}
1442
1443InstructionSelector::ComplexRendererFns
1444AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1445 MachineRegisterInfo &MRI =
1446 Root.getParent()->getParent()->getParent()->getRegInfo();
1447
1448 SmallVector<GEPInfo, 4> AddrInfo;
1449 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1450
1451 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1452 return None;
1453
1454 const GEPInfo &GEPInfo = AddrInfo[0];
1455 unsigned PtrReg = GEPInfo.SgprParts[0];
1456 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1457 if (!isUInt<32>(EncodedImm))
1458 return None;
1459
1460 return {{
1461 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1462 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1463 }};
1464}
1465
1466InstructionSelector::ComplexRendererFns
1467AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1468 MachineInstr *MI = Root.getParent();
1469 MachineBasicBlock *MBB = MI->getParent();
1470 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1471
1472 SmallVector<GEPInfo, 4> AddrInfo;
1473 getAddrModeInfo(*MI, MRI, AddrInfo);
1474
1475 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1476 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1477 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1478 return None;
1479
1480 const GEPInfo &GEPInfo = AddrInfo[0];
1481 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1482 return None;
1483
1484 // If we make it this far we have a load with an 32-bit immediate offset.
1485 // It is OK to select this using a sgpr offset, because we have already
1486 // failed trying to select this load into one of the _IMM variants since
1487 // the _IMM Patterns are considered before the _SGPR patterns.
1488 unsigned PtrReg = GEPInfo.SgprParts[0];
1489 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1490 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1491 .addImm(GEPInfo.Imm);
1492 return {{
1493 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1494 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1495 }};
1496}