blob: 317a9b5c08ccb70b71f49ffd8d93306826b05781 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +000089 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault9f992c22019-07-01 13:22:07 +000090 MRI.getType(Reg).getSizeInBits() == 1;
91 }
92
93 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
94 return RB->getID() == AMDGPU::VCCRegBankID;
95}
96
Tom Stellard1e0edad2018-05-10 21:20:10 +000097bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
98 MachineBasicBlock *BB = I.getParent();
99 MachineFunction *MF = BB->getParent();
100 MachineRegisterInfo &MRI = MF->getRegInfo();
101 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000102
103 // Special case for COPY from the scc register bank. The scc register bank
104 // is modeled using 32-bit sgprs.
105 const MachineOperand &Src = I.getOperand(1);
106 unsigned SrcReg = Src.getReg();
107 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault9f992c22019-07-01 13:22:07 +0000110 // Specially handle scc->vcc copies.
Matt Arsenault2ab25f92019-07-01 16:06:02 +0000111 if (isVCC(DstReg, MRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000113 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000114 .addImm(0)
115 .addReg(SrcReg);
116 if (!MRI.getRegClassOrNull(SrcReg))
117 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
118 I.eraseFromParent();
119 return true;
120 }
121 }
122
Tom Stellard1e0edad2018-05-10 21:20:10 +0000123 for (const MachineOperand &MO : I.operands()) {
124 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
125 continue;
126
127 const TargetRegisterClass *RC =
128 TRI.getConstrainedRegClassForOperand(MO, MRI);
129 if (!RC)
130 continue;
131 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
132 }
133 return true;
134}
135
Matt Arsenaulte1006252019-07-01 16:32:47 +0000136bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
137 MachineBasicBlock *BB = I.getParent();
138 MachineFunction *MF = BB->getParent();
139 MachineRegisterInfo &MRI = MF->getRegInfo();
140
141 const Register DefReg = I.getOperand(0).getReg();
142 const LLT DefTy = MRI.getType(DefReg);
143
144 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
145
146 const RegClassOrRegBank &RegClassOrBank =
147 MRI.getRegClassOrRegBank(DefReg);
148
149 const TargetRegisterClass *DefRC
150 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
151 if (!DefRC) {
152 if (!DefTy.isValid()) {
153 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
154 return false;
155 }
156
157 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
158 if (RB.getID() == AMDGPU::SCCRegBankID) {
159 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
160 return false;
161 }
162
163 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
164 if (!DefRC) {
165 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
166 return false;
167 }
168 }
169
170 I.setDesc(TII.get(TargetOpcode::PHI));
171 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
172}
173
Tom Stellardca166212017-01-30 21:56:46 +0000174MachineOperand
175AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000176 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000177 unsigned SubIdx) const {
178
179 MachineInstr *MI = MO.getParent();
180 MachineBasicBlock *BB = MO.getParent()->getParent();
181 MachineFunction *MF = BB->getParent();
182 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000183 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000184
185 if (MO.isReg()) {
186 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
187 unsigned Reg = MO.getReg();
188 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
189 .addReg(Reg, 0, ComposedSubIdx);
190
191 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
192 MO.isKill(), MO.isDead(), MO.isUndef(),
193 MO.isEarlyClobber(), 0, MO.isDebug(),
194 MO.isInternalRead());
195 }
196
197 assert(MO.isImm());
198
199 APInt Imm(64, MO.getImm());
200
201 switch (SubIdx) {
202 default:
203 llvm_unreachable("do not know to split immediate with this sub index.");
204 case AMDGPU::sub0:
205 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
206 case AMDGPU::sub1:
207 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
208 }
209}
210
Tom Stellard390a5f42018-07-13 21:05:14 +0000211static int64_t getConstant(const MachineInstr *MI) {
212 return MI->getOperand(1).getCImm()->getSExtValue();
213}
214
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000215bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000216 MachineBasicBlock *BB = I.getParent();
217 MachineFunction *MF = BB->getParent();
218 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000219 Register DstReg = I.getOperand(0).getReg();
220 const DebugLoc &DL = I.getDebugLoc();
221 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
222 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
223 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000224 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000225
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000226 if (Size == 32) {
227 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000228 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000229 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000230 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000231 .add(I.getOperand(1))
232 .add(I.getOperand(2));
233 I.eraseFromParent();
234 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
235 }
Tom Stellardca166212017-01-30 21:56:46 +0000236
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000237 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000238 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
239 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000240 I.addOperand(*MF, MachineOperand::CreateImm(0));
241 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
242 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
243 }
Tom Stellardca166212017-01-30 21:56:46 +0000244
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000245 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
246
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000247 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
248 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000249 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000250 .addDef(UnusedCarry, RegState::Dead)
251 .add(I.getOperand(1))
252 .add(I.getOperand(2))
253 .addImm(0);
254 I.eraseFromParent();
255 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000256 }
257
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000258 assert(!Sub && "illegal sub should not reach here");
259
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000260 const TargetRegisterClass &RC
261 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
262 const TargetRegisterClass &HalfRC
263 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
264
265 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
266 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
267 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
268 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
269
270 Register DstLo = MRI.createVirtualRegister(&HalfRC);
271 Register DstHi = MRI.createVirtualRegister(&HalfRC);
272
273 if (IsSALU) {
274 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
275 .add(Lo1)
276 .add(Lo2);
277 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
278 .add(Hi1)
279 .add(Hi2);
280 } else {
281 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
282 Register CarryReg = MRI.createVirtualRegister(CarryRC);
283 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
284 .addDef(CarryReg)
285 .add(Lo1)
286 .add(Lo2)
287 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000288 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000289 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
290 .add(Hi1)
291 .add(Hi2)
292 .addReg(CarryReg, RegState::Kill)
293 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000294
295 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
296 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000297 }
298
299 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
300 .addReg(DstLo)
301 .addImm(AMDGPU::sub0)
302 .addReg(DstHi)
303 .addImm(AMDGPU::sub1);
304
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000305
306 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000307 return false;
308
Tom Stellardca166212017-01-30 21:56:46 +0000309 I.eraseFromParent();
310 return true;
311}
312
Tom Stellard41f32192019-02-28 23:37:48 +0000313bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
314 MachineBasicBlock *BB = I.getParent();
315 MachineFunction *MF = BB->getParent();
316 MachineRegisterInfo &MRI = MF->getRegInfo();
317 assert(I.getOperand(2).getImm() % 32 == 0);
318 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
319 const DebugLoc &DL = I.getDebugLoc();
320 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
321 I.getOperand(0).getReg())
322 .addReg(I.getOperand(1).getReg(), 0, SubReg);
323
324 for (const MachineOperand &MO : Copy->operands()) {
325 const TargetRegisterClass *RC =
326 TRI.getConstrainedRegClassForOperand(MO, MRI);
327 if (!RC)
328 continue;
329 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
330 }
331 I.eraseFromParent();
332 return true;
333}
334
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000335bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
336 MachineBasicBlock *BB = MI.getParent();
337 MachineFunction *MF = BB->getParent();
338 MachineRegisterInfo &MRI = MF->getRegInfo();
339 Register DstReg = MI.getOperand(0).getReg();
340 LLT DstTy = MRI.getType(DstReg);
341 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
342
343 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000344 if (SrcSize < 32)
345 return false;
346
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000347 const DebugLoc &DL = MI.getDebugLoc();
348 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
349 const unsigned DstSize = DstTy.getSizeInBits();
350 const TargetRegisterClass *DstRC =
351 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
352 if (!DstRC)
353 return false;
354
355 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
356 MachineInstrBuilder MIB =
357 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
358 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
359 MachineOperand &Src = MI.getOperand(I + 1);
360 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
361 MIB.addImm(SubRegs[I]);
362
363 const TargetRegisterClass *SrcRC
364 = TRI.getConstrainedRegClassForOperand(Src, MRI);
365 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
366 return false;
367 }
368
369 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
370 return false;
371
372 MI.eraseFromParent();
373 return true;
374}
375
Matt Arsenault872f38b2019-07-09 14:02:26 +0000376bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
377 MachineBasicBlock *BB = MI.getParent();
378 MachineFunction *MF = BB->getParent();
379 MachineRegisterInfo &MRI = MF->getRegInfo();
380 const int NumDst = MI.getNumOperands() - 1;
381
382 MachineOperand &Src = MI.getOperand(NumDst);
383
384 Register SrcReg = Src.getReg();
385 Register DstReg0 = MI.getOperand(0).getReg();
386 LLT DstTy = MRI.getType(DstReg0);
387 LLT SrcTy = MRI.getType(SrcReg);
388
389 const unsigned DstSize = DstTy.getSizeInBits();
390 const unsigned SrcSize = SrcTy.getSizeInBits();
391 const DebugLoc &DL = MI.getDebugLoc();
392 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
393
394 const TargetRegisterClass *SrcRC =
395 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
396 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
397 return false;
398
399 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
400
401 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
402 // source, and this relies on the fact that the same subregister indices are
403 // used for both.
404 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
405 for (int I = 0, E = NumDst; I != E; ++I) {
406 MachineOperand &Dst = MI.getOperand(I);
407 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
408 .addReg(SrcReg, SrcFlags, SubRegs[I]);
409
410 const TargetRegisterClass *DstRC =
411 TRI.getConstrainedRegClassForOperand(Dst, MRI);
412 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
413 return false;
414 }
415
416 MI.eraseFromParent();
417 return true;
418}
419
Tom Stellardca166212017-01-30 21:56:46 +0000420bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000421 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000422}
423
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000424bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
425 MachineBasicBlock *BB = I.getParent();
426 MachineFunction *MF = BB->getParent();
427 MachineRegisterInfo &MRI = MF->getRegInfo();
428 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000429
430 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
431 // regbank check here is to know why getConstrainedRegClassForOperand failed.
432 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
433 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
434 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
435 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
436 return true;
437 }
438
439 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000440}
441
Tom Stellard33634d1b2019-03-01 00:50:26 +0000442bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
443 MachineBasicBlock *BB = I.getParent();
444 MachineFunction *MF = BB->getParent();
445 MachineRegisterInfo &MRI = MF->getRegInfo();
446 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
447 DebugLoc DL = I.getDebugLoc();
448 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
449 .addDef(I.getOperand(0).getReg())
450 .addReg(I.getOperand(1).getReg())
451 .addReg(I.getOperand(2).getReg())
452 .addImm(SubReg);
453
454 for (const MachineOperand &MO : Ins->operands()) {
455 if (!MO.isReg())
456 continue;
457 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
458 continue;
459
460 const TargetRegisterClass *RC =
461 TRI.getConstrainedRegClassForOperand(MO, MRI);
462 if (!RC)
463 continue;
464 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
465 }
466 I.eraseFromParent();
467 return true;
468}
469
Matt Arsenault50be3482019-07-02 14:52:16 +0000470bool AMDGPUInstructionSelector::selectG_INTRINSIC(
471 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000472 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000473 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000474 case Intrinsic::maxnum:
475 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000476 case Intrinsic::amdgcn_cvt_pkrtz:
477 return selectImpl(I, CoverageInfo);
Matt Arsenault53fa7592019-07-15 18:25:24 +0000478 case Intrinsic::amdgcn_if_break: {
479 MachineBasicBlock *BB = I.getParent();
480 MachineFunction *MF = BB->getParent();
481 MachineRegisterInfo &MRI = MF->getRegInfo();
482
483 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
484 // SelectionDAG uses for wave32 vs wave64.
485 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
486 .add(I.getOperand(0))
487 .add(I.getOperand(2))
488 .add(I.getOperand(3));
489
490 Register DstReg = I.getOperand(0).getReg();
491 Register Src0Reg = I.getOperand(2).getReg();
492 Register Src1Reg = I.getOperand(3).getReg();
493
494 I.eraseFromParent();
495
496 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
497 if (!MRI.getRegClassOrNull(Reg))
498 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
499 }
500
501 return true;
502 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000503 default:
504 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000505 }
Tom Stellarda9284732018-06-14 19:26:37 +0000506}
507
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000508static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
509 if (Size != 32 && Size != 64)
510 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000511 switch (P) {
512 default:
513 llvm_unreachable("Unknown condition code!");
514 case CmpInst::ICMP_NE:
515 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
516 case CmpInst::ICMP_EQ:
517 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
518 case CmpInst::ICMP_SGT:
519 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
520 case CmpInst::ICMP_SGE:
521 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
522 case CmpInst::ICMP_SLT:
523 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
524 case CmpInst::ICMP_SLE:
525 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
526 case CmpInst::ICMP_UGT:
527 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
528 case CmpInst::ICMP_UGE:
529 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
530 case CmpInst::ICMP_ULT:
531 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
532 case CmpInst::ICMP_ULE:
533 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
534 }
535}
536
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000537int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
538 unsigned Size) const {
539 if (Size == 64) {
540 if (!STI.hasScalarCompareEq64())
541 return -1;
542
543 switch (P) {
544 case CmpInst::ICMP_NE:
545 return AMDGPU::S_CMP_LG_U64;
546 case CmpInst::ICMP_EQ:
547 return AMDGPU::S_CMP_EQ_U64;
548 default:
549 return -1;
550 }
551 }
552
553 if (Size != 32)
554 return -1;
555
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000556 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000557 case CmpInst::ICMP_NE:
558 return AMDGPU::S_CMP_LG_U32;
559 case CmpInst::ICMP_EQ:
560 return AMDGPU::S_CMP_EQ_U32;
561 case CmpInst::ICMP_SGT:
562 return AMDGPU::S_CMP_GT_I32;
563 case CmpInst::ICMP_SGE:
564 return AMDGPU::S_CMP_GE_I32;
565 case CmpInst::ICMP_SLT:
566 return AMDGPU::S_CMP_LT_I32;
567 case CmpInst::ICMP_SLE:
568 return AMDGPU::S_CMP_LE_I32;
569 case CmpInst::ICMP_UGT:
570 return AMDGPU::S_CMP_GT_U32;
571 case CmpInst::ICMP_UGE:
572 return AMDGPU::S_CMP_GE_U32;
573 case CmpInst::ICMP_ULT:
574 return AMDGPU::S_CMP_LT_U32;
575 case CmpInst::ICMP_ULE:
576 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000577 default:
578 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000579 }
580}
581
582bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
583 MachineBasicBlock *BB = I.getParent();
584 MachineFunction *MF = BB->getParent();
585 MachineRegisterInfo &MRI = MF->getRegInfo();
586 DebugLoc DL = I.getDebugLoc();
587
588 unsigned SrcReg = I.getOperand(2).getReg();
589 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000590
591 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000592
593 unsigned CCReg = I.getOperand(0).getReg();
594 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000595 int Opcode = getS_CMPOpcode(Pred, Size);
596 if (Opcode == -1)
597 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000598 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
599 .add(I.getOperand(2))
600 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000601 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
602 .addReg(AMDGPU::SCC);
603 bool Ret =
604 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
605 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000606 I.eraseFromParent();
607 return Ret;
608 }
609
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000610 int Opcode = getV_CMPOpcode(Pred, Size);
611 if (Opcode == -1)
612 return false;
613
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000614 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
615 I.getOperand(0).getReg())
616 .add(I.getOperand(2))
617 .add(I.getOperand(3));
618 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
619 AMDGPU::SReg_64RegClass, MRI);
620 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
621 I.eraseFromParent();
622 return Ret;
623}
624
Tom Stellard390a5f42018-07-13 21:05:14 +0000625static MachineInstr *
626buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
627 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
628 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
629 const DebugLoc &DL = Insert->getDebugLoc();
630 MachineBasicBlock &BB = *Insert->getParent();
631 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
632 return BuildMI(BB, Insert, DL, TII.get(Opcode))
633 .addImm(Tgt)
634 .addReg(Reg0)
635 .addReg(Reg1)
636 .addReg(Reg2)
637 .addReg(Reg3)
638 .addImm(VM)
639 .addImm(Compr)
640 .addImm(Enabled);
641}
642
643bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000644 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000645 MachineBasicBlock *BB = I.getParent();
646 MachineFunction *MF = BB->getParent();
647 MachineRegisterInfo &MRI = MF->getRegInfo();
648
649 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
650 switch (IntrinsicID) {
651 case Intrinsic::amdgcn_exp: {
652 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
653 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
654 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
655 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
656
657 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
658 I.getOperand(4).getReg(),
659 I.getOperand(5).getReg(),
660 I.getOperand(6).getReg(),
661 VM, false, Enabled, Done);
662
663 I.eraseFromParent();
664 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
665 }
666 case Intrinsic::amdgcn_exp_compr: {
667 const DebugLoc &DL = I.getDebugLoc();
668 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
669 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
670 unsigned Reg0 = I.getOperand(3).getReg();
671 unsigned Reg1 = I.getOperand(4).getReg();
672 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
673 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
674 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
675
676 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
677 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
678 true, Enabled, Done);
679
680 I.eraseFromParent();
681 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
682 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000683 case Intrinsic::amdgcn_end_cf: {
684 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
685 // SelectionDAG uses for wave32 vs wave64.
686 BuildMI(*BB, &I, I.getDebugLoc(),
687 TII.get(AMDGPU::SI_END_CF))
688 .add(I.getOperand(1));
689
690 Register Reg = I.getOperand(1).getReg();
691 I.eraseFromParent();
692
693 if (!MRI.getRegClassOrNull(Reg))
694 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
695 return true;
696 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000697 default:
698 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000699 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000700}
701
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000702bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
703 MachineBasicBlock *BB = I.getParent();
704 MachineFunction *MF = BB->getParent();
705 MachineRegisterInfo &MRI = MF->getRegInfo();
706 const DebugLoc &DL = I.getDebugLoc();
707
708 unsigned DstReg = I.getOperand(0).getReg();
709 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000710 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000711 const MachineOperand &CCOp = I.getOperand(1);
712 unsigned CCReg = CCOp.getReg();
713 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000714 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
715 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000716 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
717 .addReg(CCReg);
718
719 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
720 // bank, because it does not cover the register class that we used to represent
721 // for it. So we need to manually set the register class here.
722 if (!MRI.getRegClassOrNull(CCReg))
723 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
724 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
725 .add(I.getOperand(2))
726 .add(I.getOperand(3));
727
728 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
729 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
730 I.eraseFromParent();
731 return Ret;
732 }
733
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000734 // Wide VGPR select should have been split in RegBankSelect.
735 if (Size > 32)
736 return false;
737
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000738 MachineInstr *Select =
739 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
740 .addImm(0)
741 .add(I.getOperand(3))
742 .addImm(0)
743 .add(I.getOperand(2))
744 .add(I.getOperand(1));
745
746 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
747 I.eraseFromParent();
748 return Ret;
749}
750
Tom Stellardca166212017-01-30 21:56:46 +0000751bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
752 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000753 MachineFunction *MF = BB->getParent();
754 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000755 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000756 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
757 if (PtrSize != 64) {
758 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
759 return false;
760 }
761
Tom Stellard655fdd32018-05-11 23:12:49 +0000762 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
763 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000764
765 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000766 switch (StoreSize) {
767 default:
768 return false;
769 case 32:
770 Opcode = AMDGPU::FLAT_STORE_DWORD;
771 break;
772 case 64:
773 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
774 break;
775 case 96:
776 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
777 break;
778 case 128:
779 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
780 break;
781 }
782
783 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000784 .add(I.getOperand(1))
785 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000786 .addImm(0) // offset
787 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000788 .addImm(0) // slc
789 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000790
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000791
Tom Stellardca166212017-01-30 21:56:46 +0000792 // Now that we selected an opcode, we need to constrain the register
793 // operands to use appropriate classes.
794 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
795
796 I.eraseFromParent();
797 return Ret;
798}
799
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000800static int sizeToSubRegIndex(unsigned Size) {
801 switch (Size) {
802 case 32:
803 return AMDGPU::sub0;
804 case 64:
805 return AMDGPU::sub0_sub1;
806 case 96:
807 return AMDGPU::sub0_sub1_sub2;
808 case 128:
809 return AMDGPU::sub0_sub1_sub2_sub3;
810 case 256:
811 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
812 default:
813 if (Size < 32)
814 return AMDGPU::sub0;
815 if (Size > 256)
816 return -1;
817 return sizeToSubRegIndex(PowerOf2Ceil(Size));
818 }
819}
820
821bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
822 MachineBasicBlock *BB = I.getParent();
823 MachineFunction *MF = BB->getParent();
824 MachineRegisterInfo &MRI = MF->getRegInfo();
825
826 unsigned DstReg = I.getOperand(0).getReg();
827 unsigned SrcReg = I.getOperand(1).getReg();
828 const LLT DstTy = MRI.getType(DstReg);
829 const LLT SrcTy = MRI.getType(SrcReg);
830 if (!DstTy.isScalar())
831 return false;
832
833 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
834 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
835 if (SrcRB != DstRB)
836 return false;
837
838 unsigned DstSize = DstTy.getSizeInBits();
839 unsigned SrcSize = SrcTy.getSizeInBits();
840
841 const TargetRegisterClass *SrcRC
842 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
843 const TargetRegisterClass *DstRC
844 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
845
846 if (SrcSize > 32) {
847 int SubRegIdx = sizeToSubRegIndex(DstSize);
848 if (SubRegIdx == -1)
849 return false;
850
851 // Deal with weird cases where the class only partially supports the subreg
852 // index.
853 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
854 if (!SrcRC)
855 return false;
856
857 I.getOperand(1).setSubReg(SubRegIdx);
858 }
859
860 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
861 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
862 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
863 return false;
864 }
865
866 I.setDesc(TII.get(TargetOpcode::COPY));
867 return true;
868}
869
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000870/// \returns true if a bitmask for \p Size bits will be an inline immediate.
871static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
872 Mask = maskTrailingOnes<unsigned>(Size);
873 int SignedMask = static_cast<int>(Mask);
874 return SignedMask >= -16 && SignedMask <= 64;
875}
876
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000877bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
878 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
879 const DebugLoc &DL = I.getDebugLoc();
880 MachineBasicBlock &MBB = *I.getParent();
881 MachineFunction &MF = *MBB.getParent();
882 MachineRegisterInfo &MRI = MF.getRegInfo();
883 const unsigned DstReg = I.getOperand(0).getReg();
884 const unsigned SrcReg = I.getOperand(1).getReg();
885
886 const LLT DstTy = MRI.getType(DstReg);
887 const LLT SrcTy = MRI.getType(SrcReg);
888 const LLT S1 = LLT::scalar(1);
889 const unsigned SrcSize = SrcTy.getSizeInBits();
890 const unsigned DstSize = DstTy.getSizeInBits();
891 if (!DstTy.isScalar())
892 return false;
893
894 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
895
896 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
897 if (SrcTy != S1 || DstSize > 64) // Invalid
898 return false;
899
900 unsigned Opcode =
901 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
902 const TargetRegisterClass *DstRC =
903 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
904
905 // FIXME: Create an extra copy to avoid incorrectly constraining the result
906 // of the scc producer.
907 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
908 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
909 .addReg(SrcReg);
910 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
911 .addReg(TmpReg);
912
913 // The instruction operands are backwards from what you would expect.
914 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
915 .addImm(0)
916 .addImm(Signed ? -1 : 1);
917 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
918 }
919
920 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
921 if (SrcTy != S1) // Invalid
922 return false;
923
924 MachineInstr *ExtI =
925 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
926 .addImm(0) // src0_modifiers
927 .addImm(0) // src0
928 .addImm(0) // src1_modifiers
929 .addImm(Signed ? -1 : 1) // src1
930 .addUse(SrcReg);
931 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
932 }
933
934 if (I.getOpcode() == AMDGPU::G_ANYEXT)
935 return selectCOPY(I);
936
937 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
938 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000939
940 // Try to use an and with a mask if it will save code size.
941 unsigned Mask;
942 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
943 MachineInstr *ExtI =
944 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
945 .addImm(Mask)
946 .addReg(SrcReg);
947 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
948 }
949
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000950 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
951 MachineInstr *ExtI =
952 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
953 .addReg(SrcReg)
954 .addImm(0) // Offset
955 .addImm(SrcSize); // Width
956 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
957 }
958
959 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
960 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
961 return false;
962
963 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
964 const unsigned SextOpc = SrcSize == 8 ?
965 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
966 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
967 .addReg(SrcReg);
968 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
969 }
970
971 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
972 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
973
974 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
975 if (DstSize > 32 && SrcSize <= 32) {
976 // We need a 64-bit register source, but the high bits don't matter.
977 unsigned ExtReg
978 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
979 unsigned UndefReg
980 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
981 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
982 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
983 .addReg(SrcReg)
984 .addImm(AMDGPU::sub0)
985 .addReg(UndefReg)
986 .addImm(AMDGPU::sub1);
987
988 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
989 .addReg(ExtReg)
990 .addImm(SrcSize << 16);
991
992 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
993 }
994
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000995 unsigned Mask;
996 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
997 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
998 .addReg(SrcReg)
999 .addImm(Mask);
1000 } else {
1001 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1002 .addReg(SrcReg)
1003 .addImm(SrcSize << 16);
1004 }
1005
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001006 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1007 }
1008
1009 return false;
1010}
1011
Tom Stellardca166212017-01-30 21:56:46 +00001012bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1013 MachineBasicBlock *BB = I.getParent();
1014 MachineFunction *MF = BB->getParent();
1015 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001016 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001017
Tom Stellarde182b282018-05-15 17:57:09 +00001018 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1019 if (ImmOp.isFPImm()) {
1020 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1021 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1022 } else if (ImmOp.isCImm()) {
1023 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1024 }
1025
1026 unsigned DstReg = I.getOperand(0).getReg();
1027 unsigned Size;
1028 bool IsSgpr;
1029 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1030 if (RB) {
1031 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1032 Size = MRI.getType(DstReg).getSizeInBits();
1033 } else {
1034 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1035 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001036 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001037 }
1038
1039 if (Size != 32 && Size != 64)
1040 return false;
1041
1042 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001043 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001044 I.setDesc(TII.get(Opcode));
1045 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001046 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1047 }
1048
Tom Stellardca166212017-01-30 21:56:46 +00001049 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001050 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1051 &AMDGPU::VGPR_32RegClass;
1052 unsigned LoReg = MRI.createVirtualRegister(RC);
1053 unsigned HiReg = MRI.createVirtualRegister(RC);
1054 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001055
Tom Stellarde182b282018-05-15 17:57:09 +00001056 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001057 .addImm(Imm.trunc(32).getZExtValue());
1058
Tom Stellarde182b282018-05-15 17:57:09 +00001059 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001060 .addImm(Imm.ashr(32).getZExtValue());
1061
Tom Stellarde182b282018-05-15 17:57:09 +00001062 const MachineInstr *RS =
1063 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1064 .addReg(LoReg)
1065 .addImm(AMDGPU::sub0)
1066 .addReg(HiReg)
1067 .addImm(AMDGPU::sub1);
1068
Tom Stellardca166212017-01-30 21:56:46 +00001069 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1070 // work for target independent opcodes
1071 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001072 const TargetRegisterClass *DstRC =
1073 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1074 if (!DstRC)
1075 return true;
1076 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001077}
1078
1079static bool isConstant(const MachineInstr &MI) {
1080 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1081}
1082
1083void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1084 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1085
1086 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1087
1088 assert(PtrMI);
1089
1090 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1091 return;
1092
1093 GEPInfo GEPInfo(*PtrMI);
1094
1095 for (unsigned i = 1, e = 3; i < e; ++i) {
1096 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1097 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1098 assert(OpDef);
1099 if (isConstant(*OpDef)) {
1100 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1101 // are lacking other optimizations.
1102 assert(GEPInfo.Imm == 0);
1103 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1104 continue;
1105 }
1106 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1107 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1108 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1109 else
1110 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1111 }
1112
1113 AddrInfo.push_back(GEPInfo);
1114 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1115}
1116
Tom Stellard79b5c382019-02-20 21:02:37 +00001117bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001118 if (!MI.hasOneMemOperand())
1119 return false;
1120
1121 const MachineMemOperand *MMO = *MI.memoperands_begin();
1122 const Value *Ptr = MMO->getValue();
1123
1124 // UndefValue means this is a load of a kernel input. These are uniform.
1125 // Sometimes LDS instructions have constant pointers.
1126 // If Ptr is null, then that means this mem operand contains a
1127 // PseudoSourceValue like GOT.
1128 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1129 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1130 return true;
1131
Matt Arsenault923712b2018-02-09 16:57:57 +00001132 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1133 return true;
1134
Tom Stellardca166212017-01-30 21:56:46 +00001135 const Instruction *I = dyn_cast<Instruction>(Ptr);
1136 return I && I->getMetadata("amdgpu.uniform");
1137}
1138
Tom Stellardca166212017-01-30 21:56:46 +00001139bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1140 for (const GEPInfo &GEPInfo : AddrInfo) {
1141 if (!GEPInfo.VgprParts.empty())
1142 return true;
1143 }
1144 return false;
1145}
1146
Tom Stellardca166212017-01-30 21:56:46 +00001147bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1148 MachineBasicBlock *BB = I.getParent();
1149 MachineFunction *MF = BB->getParent();
1150 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001151 const DebugLoc &DL = I.getDebugLoc();
1152 Register DstReg = I.getOperand(0).getReg();
1153 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001154 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1155 unsigned Opcode;
1156
Matt Arsenaulta3107272019-07-01 16:36:39 +00001157 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1158 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1159 return false;
1160 }
1161
Tom Stellardca166212017-01-30 21:56:46 +00001162 SmallVector<GEPInfo, 4> AddrInfo;
1163
1164 getAddrModeInfo(I, MRI, AddrInfo);
1165
Tom Stellardca166212017-01-30 21:56:46 +00001166 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001167 case 32:
1168 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1169 break;
1170 case 64:
1171 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1172 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001173 default:
1174 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1175 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001176 }
1177
1178 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1179 .add(I.getOperand(0))
1180 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001181 .addImm(0) // offset
1182 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001183 .addImm(0) // slc
1184 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001185
1186 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1187 I.eraseFromParent();
1188 return Ret;
1189}
1190
Matt Arsenault64642802019-07-01 15:39:27 +00001191bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1192 MachineBasicBlock *BB = I.getParent();
1193 MachineFunction *MF = BB->getParent();
1194 MachineRegisterInfo &MRI = MF->getRegInfo();
1195 MachineOperand &CondOp = I.getOperand(0);
1196 Register CondReg = CondOp.getReg();
1197 const DebugLoc &DL = I.getDebugLoc();
1198
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001199 unsigned BrOpcode;
1200 Register CondPhysReg;
1201 const TargetRegisterClass *ConstrainRC;
1202
1203 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1204 // whether the branch is uniform when selecting the instruction. In
1205 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1206 // RegBankSelect knows what it's doing if the branch condition is scc, even
1207 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001208 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001209 CondPhysReg = AMDGPU::SCC;
1210 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1211 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1212 } else if (isVCC(CondReg, MRI)) {
1213 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1214 // We sort of know that a VCC producer based on the register bank, that ands
1215 // inactive lanes with 0. What if there was a logical operation with vcc
1216 // producers in different blocks/with different exec masks?
1217 // FIXME: Should scc->vcc copies and with exec?
1218 CondPhysReg = TRI.getVCC();
1219 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1220 ConstrainRC = TRI.getBoolRC();
1221 } else
1222 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001223
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001224 if (!MRI.getRegClassOrNull(CondReg))
1225 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001226
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001227 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1228 .addReg(CondReg);
1229 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1230 .addMBB(I.getOperand(1).getMBB());
1231
1232 I.eraseFromParent();
1233 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001234}
1235
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001236bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1237 MachineBasicBlock *BB = I.getParent();
1238 MachineFunction *MF = BB->getParent();
1239 MachineRegisterInfo &MRI = MF->getRegInfo();
1240
1241 Register DstReg = I.getOperand(0).getReg();
1242 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1243 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1244 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1245 if (IsVGPR)
1246 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1247
1248 return RBI.constrainGenericRegister(
1249 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1250}
1251
Daniel Sandersf76f3152017-11-16 00:46:35 +00001252bool AMDGPUInstructionSelector::select(MachineInstr &I,
1253 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001254 if (I.isPHI())
1255 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001256
Tom Stellard7712ee82018-06-22 00:44:29 +00001257 if (!isPreISelGenericOpcode(I.getOpcode())) {
1258 if (I.isCopy())
1259 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001260 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001261 }
Tom Stellardca166212017-01-30 21:56:46 +00001262
1263 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001264 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001265 case TargetOpcode::G_SUB:
1266 if (selectG_ADD_SUB(I))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001267 return true;
1268 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001269 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001270 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001271 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001272 case TargetOpcode::G_BITCAST:
1273 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001274 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001275 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001276 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001277 case TargetOpcode::G_EXTRACT:
1278 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001279 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001280 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001281 case TargetOpcode::G_CONCAT_VECTORS:
1282 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001283 case TargetOpcode::G_UNMERGE_VALUES:
1284 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001285 case TargetOpcode::G_GEP:
1286 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001287 case TargetOpcode::G_IMPLICIT_DEF:
1288 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001289 case TargetOpcode::G_INSERT:
1290 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001291 case TargetOpcode::G_INTRINSIC:
1292 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001293 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1294 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001295 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001296 if (selectG_ICMP(I))
1297 return true;
1298 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001299 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001300 if (selectImpl(I, CoverageInfo))
1301 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001302 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001303 case TargetOpcode::G_SELECT:
1304 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001305 case TargetOpcode::G_STORE:
1306 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001307 case TargetOpcode::G_TRUNC:
1308 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001309 case TargetOpcode::G_SEXT:
1310 case TargetOpcode::G_ZEXT:
1311 case TargetOpcode::G_ANYEXT:
1312 if (selectG_SZA_EXT(I)) {
1313 I.eraseFromParent();
1314 return true;
1315 }
1316
1317 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001318 case TargetOpcode::G_BRCOND:
1319 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001320 case TargetOpcode::G_FRAME_INDEX:
1321 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001322 case TargetOpcode::G_FENCE:
1323 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1324 // is checking for G_CONSTANT
1325 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1326 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001327 }
1328 return false;
1329}
Tom Stellard1dc90202018-05-10 20:53:06 +00001330
Tom Stellard26fac0f2018-06-22 02:54:57 +00001331InstructionSelector::ComplexRendererFns
1332AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1333 return {{
1334 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1335 }};
1336
1337}
1338
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001339std::pair<Register, unsigned>
1340AMDGPUInstructionSelector::selectVOP3ModsImpl(
1341 Register Src, const MachineRegisterInfo &MRI) const {
1342 unsigned Mods = 0;
1343 MachineInstr *MI = MRI.getVRegDef(Src);
1344
1345 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1346 Src = MI->getOperand(1).getReg();
1347 Mods |= SISrcMods::NEG;
1348 MI = MRI.getVRegDef(Src);
1349 }
1350
1351 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1352 Src = MI->getOperand(1).getReg();
1353 Mods |= SISrcMods::ABS;
1354 }
1355
1356 return std::make_pair(Src, Mods);
1357}
1358
Tom Stellard1dc90202018-05-10 20:53:06 +00001359///
1360/// This will select either an SGPR or VGPR operand and will save us from
1361/// having to write an extra tablegen pattern.
1362InstructionSelector::ComplexRendererFns
1363AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1364 return {{
1365 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1366 }};
1367}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001368
1369InstructionSelector::ComplexRendererFns
1370AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001371 MachineRegisterInfo &MRI
1372 = Root.getParent()->getParent()->getParent()->getRegInfo();
1373
1374 Register Src;
1375 unsigned Mods;
1376 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1377
Tom Stellarddcc95e92018-05-11 05:44:16 +00001378 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001379 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1380 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1381 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1382 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001383 }};
1384}
Tom Stellard9a653572018-06-22 02:34:29 +00001385InstructionSelector::ComplexRendererFns
1386AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1387 return {{
1388 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1389 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1390 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1391 }};
1392}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001393
1394InstructionSelector::ComplexRendererFns
1395AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001396 MachineRegisterInfo &MRI
1397 = Root.getParent()->getParent()->getParent()->getRegInfo();
1398
1399 Register Src;
1400 unsigned Mods;
1401 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1402
Tom Stellard46bbbc32018-06-13 22:30:47 +00001403 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001404 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1405 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001406 }};
1407}
Tom Stellard79b5c382019-02-20 21:02:37 +00001408
1409InstructionSelector::ComplexRendererFns
1410AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1411 MachineRegisterInfo &MRI =
1412 Root.getParent()->getParent()->getParent()->getRegInfo();
1413
1414 SmallVector<GEPInfo, 4> AddrInfo;
1415 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1416
1417 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1418 return None;
1419
1420 const GEPInfo &GEPInfo = AddrInfo[0];
1421
1422 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1423 return None;
1424
1425 unsigned PtrReg = GEPInfo.SgprParts[0];
1426 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1427 return {{
1428 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1429 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1430 }};
1431}
1432
1433InstructionSelector::ComplexRendererFns
1434AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1435 MachineRegisterInfo &MRI =
1436 Root.getParent()->getParent()->getParent()->getRegInfo();
1437
1438 SmallVector<GEPInfo, 4> AddrInfo;
1439 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1440
1441 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1442 return None;
1443
1444 const GEPInfo &GEPInfo = AddrInfo[0];
1445 unsigned PtrReg = GEPInfo.SgprParts[0];
1446 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1447 if (!isUInt<32>(EncodedImm))
1448 return None;
1449
1450 return {{
1451 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1452 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1453 }};
1454}
1455
1456InstructionSelector::ComplexRendererFns
1457AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1458 MachineInstr *MI = Root.getParent();
1459 MachineBasicBlock *MBB = MI->getParent();
1460 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1461
1462 SmallVector<GEPInfo, 4> AddrInfo;
1463 getAddrModeInfo(*MI, MRI, AddrInfo);
1464
1465 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1466 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1467 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1468 return None;
1469
1470 const GEPInfo &GEPInfo = AddrInfo[0];
1471 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1472 return None;
1473
1474 // If we make it this far we have a load with an 32-bit immediate offset.
1475 // It is OK to select this using a sgpr offset, because we have already
1476 // failed trying to select this load into one of the _IMM variants since
1477 // the _IMM Patterns are considered before the _SGPR patterns.
1478 unsigned PtrReg = GEPInfo.SgprParts[0];
1479 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1480 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1481 .addImm(GEPInfo.Imm);
1482 return {{
1483 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1484 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1485 }};
1486}