blob: 462158d9f973aa4a947657cfe380f5f142807cb6 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +000089 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault9f992c22019-07-01 13:22:07 +000090 MRI.getType(Reg).getSizeInBits() == 1;
91 }
92
93 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
94 return RB->getID() == AMDGPU::VCCRegBankID;
95}
96
Tom Stellard1e0edad2018-05-10 21:20:10 +000097bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
98 MachineBasicBlock *BB = I.getParent();
99 MachineFunction *MF = BB->getParent();
100 MachineRegisterInfo &MRI = MF->getRegInfo();
101 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000102
103 // Special case for COPY from the scc register bank. The scc register bank
104 // is modeled using 32-bit sgprs.
105 const MachineOperand &Src = I.getOperand(1);
106 unsigned SrcReg = Src.getReg();
107 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault9f992c22019-07-01 13:22:07 +0000110 // Specially handle scc->vcc copies.
Matt Arsenault2ab25f92019-07-01 16:06:02 +0000111 if (isVCC(DstReg, MRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000113 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000114 .addImm(0)
115 .addReg(SrcReg);
116 if (!MRI.getRegClassOrNull(SrcReg))
117 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
118 I.eraseFromParent();
119 return true;
120 }
121 }
122
Tom Stellard1e0edad2018-05-10 21:20:10 +0000123 for (const MachineOperand &MO : I.operands()) {
124 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
125 continue;
126
127 const TargetRegisterClass *RC =
128 TRI.getConstrainedRegClassForOperand(MO, MRI);
129 if (!RC)
130 continue;
131 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
132 }
133 return true;
134}
135
Matt Arsenaulte1006252019-07-01 16:32:47 +0000136bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
137 MachineBasicBlock *BB = I.getParent();
138 MachineFunction *MF = BB->getParent();
139 MachineRegisterInfo &MRI = MF->getRegInfo();
140
141 const Register DefReg = I.getOperand(0).getReg();
142 const LLT DefTy = MRI.getType(DefReg);
143
144 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
145
146 const RegClassOrRegBank &RegClassOrBank =
147 MRI.getRegClassOrRegBank(DefReg);
148
149 const TargetRegisterClass *DefRC
150 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
151 if (!DefRC) {
152 if (!DefTy.isValid()) {
153 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
154 return false;
155 }
156
157 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
158 if (RB.getID() == AMDGPU::SCCRegBankID) {
159 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
160 return false;
161 }
162
163 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
164 if (!DefRC) {
165 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
166 return false;
167 }
168 }
169
170 I.setDesc(TII.get(TargetOpcode::PHI));
171 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
172}
173
Tom Stellardca166212017-01-30 21:56:46 +0000174MachineOperand
175AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
176 unsigned SubIdx) const {
177
178 MachineInstr *MI = MO.getParent();
179 MachineBasicBlock *BB = MO.getParent()->getParent();
180 MachineFunction *MF = BB->getParent();
181 MachineRegisterInfo &MRI = MF->getRegInfo();
182 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
183
184 if (MO.isReg()) {
185 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
186 unsigned Reg = MO.getReg();
187 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
188 .addReg(Reg, 0, ComposedSubIdx);
189
190 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
191 MO.isKill(), MO.isDead(), MO.isUndef(),
192 MO.isEarlyClobber(), 0, MO.isDebug(),
193 MO.isInternalRead());
194 }
195
196 assert(MO.isImm());
197
198 APInt Imm(64, MO.getImm());
199
200 switch (SubIdx) {
201 default:
202 llvm_unreachable("do not know to split immediate with this sub index.");
203 case AMDGPU::sub0:
204 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
205 case AMDGPU::sub1:
206 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
207 }
208}
209
Tom Stellard390a5f42018-07-13 21:05:14 +0000210static int64_t getConstant(const MachineInstr *MI) {
211 return MI->getOperand(1).getCImm()->getSExtValue();
212}
213
Tom Stellardca166212017-01-30 21:56:46 +0000214bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
215 MachineBasicBlock *BB = I.getParent();
216 MachineFunction *MF = BB->getParent();
217 MachineRegisterInfo &MRI = MF->getRegInfo();
218 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
219 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
220 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
221
222 if (Size != 64)
223 return false;
224
225 DebugLoc DL = I.getDebugLoc();
226
Tom Stellard124f5cc2017-01-31 15:24:11 +0000227 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
228 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
229
Tom Stellardca166212017-01-30 21:56:46 +0000230 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000231 .add(Lo1)
232 .add(Lo2);
233
234 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
235 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000236
237 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000238 .add(Hi1)
239 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000240
241 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
242 .addReg(DstLo)
243 .addImm(AMDGPU::sub0)
244 .addReg(DstHi)
245 .addImm(AMDGPU::sub1);
246
247 for (MachineOperand &MO : I.explicit_operands()) {
248 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
249 continue;
250 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
251 }
252
253 I.eraseFromParent();
254 return true;
255}
256
Tom Stellard41f32192019-02-28 23:37:48 +0000257bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
258 MachineBasicBlock *BB = I.getParent();
259 MachineFunction *MF = BB->getParent();
260 MachineRegisterInfo &MRI = MF->getRegInfo();
261 assert(I.getOperand(2).getImm() % 32 == 0);
262 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
263 const DebugLoc &DL = I.getDebugLoc();
264 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
265 I.getOperand(0).getReg())
266 .addReg(I.getOperand(1).getReg(), 0, SubReg);
267
268 for (const MachineOperand &MO : Copy->operands()) {
269 const TargetRegisterClass *RC =
270 TRI.getConstrainedRegClassForOperand(MO, MRI);
271 if (!RC)
272 continue;
273 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
274 }
275 I.eraseFromParent();
276 return true;
277}
278
Tom Stellardca166212017-01-30 21:56:46 +0000279bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
280 return selectG_ADD(I);
281}
282
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000283bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
284 MachineBasicBlock *BB = I.getParent();
285 MachineFunction *MF = BB->getParent();
286 MachineRegisterInfo &MRI = MF->getRegInfo();
287 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000288
289 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
290 // regbank check here is to know why getConstrainedRegClassForOperand failed.
291 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
292 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
293 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
294 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
295 return true;
296 }
297
298 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000299}
300
Tom Stellard33634d1b2019-03-01 00:50:26 +0000301bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
302 MachineBasicBlock *BB = I.getParent();
303 MachineFunction *MF = BB->getParent();
304 MachineRegisterInfo &MRI = MF->getRegInfo();
305 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
306 DebugLoc DL = I.getDebugLoc();
307 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
308 .addDef(I.getOperand(0).getReg())
309 .addReg(I.getOperand(1).getReg())
310 .addReg(I.getOperand(2).getReg())
311 .addImm(SubReg);
312
313 for (const MachineOperand &MO : Ins->operands()) {
314 if (!MO.isReg())
315 continue;
316 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
317 continue;
318
319 const TargetRegisterClass *RC =
320 TRI.getConstrainedRegClassForOperand(MO, MRI);
321 if (!RC)
322 continue;
323 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
324 }
325 I.eraseFromParent();
326 return true;
327}
328
Tom Stellarda9284732018-06-14 19:26:37 +0000329bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
330 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000331 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000332 switch (IntrinsicID) {
333 default:
334 break;
Tom Stellardac684712018-07-13 22:16:03 +0000335 case Intrinsic::maxnum:
336 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000337 case Intrinsic::amdgcn_cvt_pkrtz:
338 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000339
340 case Intrinsic::amdgcn_kernarg_segment_ptr: {
341 MachineFunction *MF = I.getParent()->getParent();
342 MachineRegisterInfo &MRI = MF->getRegInfo();
343 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
344 const ArgDescriptor *InputPtrReg;
345 const TargetRegisterClass *RC;
346 const DebugLoc &DL = I.getDebugLoc();
347
348 std::tie(InputPtrReg, RC)
349 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
350 if (!InputPtrReg)
351 report_fatal_error("missing kernarg segment ptr");
352
353 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
354 .add(I.getOperand(0))
355 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
356 I.eraseFromParent();
357 return true;
358 }
Tom Stellarda9284732018-06-14 19:26:37 +0000359 }
360 return false;
361}
362
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000363static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
364 if (Size != 32 && Size != 64)
365 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000366 switch (P) {
367 default:
368 llvm_unreachable("Unknown condition code!");
369 case CmpInst::ICMP_NE:
370 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
371 case CmpInst::ICMP_EQ:
372 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
373 case CmpInst::ICMP_SGT:
374 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
375 case CmpInst::ICMP_SGE:
376 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
377 case CmpInst::ICMP_SLT:
378 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
379 case CmpInst::ICMP_SLE:
380 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
381 case CmpInst::ICMP_UGT:
382 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
383 case CmpInst::ICMP_UGE:
384 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
385 case CmpInst::ICMP_ULT:
386 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
387 case CmpInst::ICMP_ULE:
388 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
389 }
390}
391
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000392int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
393 unsigned Size) const {
394 if (Size == 64) {
395 if (!STI.hasScalarCompareEq64())
396 return -1;
397
398 switch (P) {
399 case CmpInst::ICMP_NE:
400 return AMDGPU::S_CMP_LG_U64;
401 case CmpInst::ICMP_EQ:
402 return AMDGPU::S_CMP_EQ_U64;
403 default:
404 return -1;
405 }
406 }
407
408 if (Size != 32)
409 return -1;
410
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000411 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000412 case CmpInst::ICMP_NE:
413 return AMDGPU::S_CMP_LG_U32;
414 case CmpInst::ICMP_EQ:
415 return AMDGPU::S_CMP_EQ_U32;
416 case CmpInst::ICMP_SGT:
417 return AMDGPU::S_CMP_GT_I32;
418 case CmpInst::ICMP_SGE:
419 return AMDGPU::S_CMP_GE_I32;
420 case CmpInst::ICMP_SLT:
421 return AMDGPU::S_CMP_LT_I32;
422 case CmpInst::ICMP_SLE:
423 return AMDGPU::S_CMP_LE_I32;
424 case CmpInst::ICMP_UGT:
425 return AMDGPU::S_CMP_GT_U32;
426 case CmpInst::ICMP_UGE:
427 return AMDGPU::S_CMP_GE_U32;
428 case CmpInst::ICMP_ULT:
429 return AMDGPU::S_CMP_LT_U32;
430 case CmpInst::ICMP_ULE:
431 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000432 default:
433 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000434 }
435}
436
437bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
438 MachineBasicBlock *BB = I.getParent();
439 MachineFunction *MF = BB->getParent();
440 MachineRegisterInfo &MRI = MF->getRegInfo();
441 DebugLoc DL = I.getDebugLoc();
442
443 unsigned SrcReg = I.getOperand(2).getReg();
444 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000445
446 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000447
448 unsigned CCReg = I.getOperand(0).getReg();
449 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000450 int Opcode = getS_CMPOpcode(Pred, Size);
451 if (Opcode == -1)
452 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000453 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
454 .add(I.getOperand(2))
455 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000456 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
457 .addReg(AMDGPU::SCC);
458 bool Ret =
459 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
460 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000461 I.eraseFromParent();
462 return Ret;
463 }
464
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000465 int Opcode = getV_CMPOpcode(Pred, Size);
466 if (Opcode == -1)
467 return false;
468
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000469 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
470 I.getOperand(0).getReg())
471 .add(I.getOperand(2))
472 .add(I.getOperand(3));
473 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
474 AMDGPU::SReg_64RegClass, MRI);
475 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
476 I.eraseFromParent();
477 return Ret;
478}
479
Tom Stellard390a5f42018-07-13 21:05:14 +0000480static MachineInstr *
481buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
482 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
483 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
484 const DebugLoc &DL = Insert->getDebugLoc();
485 MachineBasicBlock &BB = *Insert->getParent();
486 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
487 return BuildMI(BB, Insert, DL, TII.get(Opcode))
488 .addImm(Tgt)
489 .addReg(Reg0)
490 .addReg(Reg1)
491 .addReg(Reg2)
492 .addReg(Reg3)
493 .addImm(VM)
494 .addImm(Compr)
495 .addImm(Enabled);
496}
497
498bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
499 MachineInstr &I,
500 CodeGenCoverage &CoverageInfo) const {
501 MachineBasicBlock *BB = I.getParent();
502 MachineFunction *MF = BB->getParent();
503 MachineRegisterInfo &MRI = MF->getRegInfo();
504
505 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
506 switch (IntrinsicID) {
507 case Intrinsic::amdgcn_exp: {
508 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
509 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
510 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
511 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
512
513 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
514 I.getOperand(4).getReg(),
515 I.getOperand(5).getReg(),
516 I.getOperand(6).getReg(),
517 VM, false, Enabled, Done);
518
519 I.eraseFromParent();
520 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
521 }
522 case Intrinsic::amdgcn_exp_compr: {
523 const DebugLoc &DL = I.getDebugLoc();
524 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
525 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
526 unsigned Reg0 = I.getOperand(3).getReg();
527 unsigned Reg1 = I.getOperand(4).getReg();
528 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
529 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
530 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
531
532 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
533 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
534 true, Enabled, Done);
535
536 I.eraseFromParent();
537 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
538 }
539 }
540 return false;
541}
542
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000543bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
544 MachineBasicBlock *BB = I.getParent();
545 MachineFunction *MF = BB->getParent();
546 MachineRegisterInfo &MRI = MF->getRegInfo();
547 const DebugLoc &DL = I.getDebugLoc();
548
549 unsigned DstReg = I.getOperand(0).getReg();
550 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000551 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000552 const MachineOperand &CCOp = I.getOperand(1);
553 unsigned CCReg = CCOp.getReg();
554 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000555 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
556 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000557 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
558 .addReg(CCReg);
559
560 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
561 // bank, because it does not cover the register class that we used to represent
562 // for it. So we need to manually set the register class here.
563 if (!MRI.getRegClassOrNull(CCReg))
564 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
565 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
566 .add(I.getOperand(2))
567 .add(I.getOperand(3));
568
569 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
570 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
571 I.eraseFromParent();
572 return Ret;
573 }
574
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000575 // Wide VGPR select should have been split in RegBankSelect.
576 if (Size > 32)
577 return false;
578
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000579 MachineInstr *Select =
580 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
581 .addImm(0)
582 .add(I.getOperand(3))
583 .addImm(0)
584 .add(I.getOperand(2))
585 .add(I.getOperand(1));
586
587 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
588 I.eraseFromParent();
589 return Ret;
590}
591
Tom Stellardca166212017-01-30 21:56:46 +0000592bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
593 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000594 MachineFunction *MF = BB->getParent();
595 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000596 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000597 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
598 if (PtrSize != 64) {
599 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
600 return false;
601 }
602
Tom Stellard655fdd32018-05-11 23:12:49 +0000603 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
604 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000605
606 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000607 switch (StoreSize) {
608 default:
609 return false;
610 case 32:
611 Opcode = AMDGPU::FLAT_STORE_DWORD;
612 break;
613 case 64:
614 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
615 break;
616 case 96:
617 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
618 break;
619 case 128:
620 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
621 break;
622 }
623
624 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000625 .add(I.getOperand(1))
626 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000627 .addImm(0) // offset
628 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000629 .addImm(0) // slc
630 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000631
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000632
Tom Stellardca166212017-01-30 21:56:46 +0000633 // Now that we selected an opcode, we need to constrain the register
634 // operands to use appropriate classes.
635 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
636
637 I.eraseFromParent();
638 return Ret;
639}
640
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000641static int sizeToSubRegIndex(unsigned Size) {
642 switch (Size) {
643 case 32:
644 return AMDGPU::sub0;
645 case 64:
646 return AMDGPU::sub0_sub1;
647 case 96:
648 return AMDGPU::sub0_sub1_sub2;
649 case 128:
650 return AMDGPU::sub0_sub1_sub2_sub3;
651 case 256:
652 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
653 default:
654 if (Size < 32)
655 return AMDGPU::sub0;
656 if (Size > 256)
657 return -1;
658 return sizeToSubRegIndex(PowerOf2Ceil(Size));
659 }
660}
661
662bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
663 MachineBasicBlock *BB = I.getParent();
664 MachineFunction *MF = BB->getParent();
665 MachineRegisterInfo &MRI = MF->getRegInfo();
666
667 unsigned DstReg = I.getOperand(0).getReg();
668 unsigned SrcReg = I.getOperand(1).getReg();
669 const LLT DstTy = MRI.getType(DstReg);
670 const LLT SrcTy = MRI.getType(SrcReg);
671 if (!DstTy.isScalar())
672 return false;
673
674 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
675 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
676 if (SrcRB != DstRB)
677 return false;
678
679 unsigned DstSize = DstTy.getSizeInBits();
680 unsigned SrcSize = SrcTy.getSizeInBits();
681
682 const TargetRegisterClass *SrcRC
683 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
684 const TargetRegisterClass *DstRC
685 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
686
687 if (SrcSize > 32) {
688 int SubRegIdx = sizeToSubRegIndex(DstSize);
689 if (SubRegIdx == -1)
690 return false;
691
692 // Deal with weird cases where the class only partially supports the subreg
693 // index.
694 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
695 if (!SrcRC)
696 return false;
697
698 I.getOperand(1).setSubReg(SubRegIdx);
699 }
700
701 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
702 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
703 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
704 return false;
705 }
706
707 I.setDesc(TII.get(TargetOpcode::COPY));
708 return true;
709}
710
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000711/// \returns true if a bitmask for \p Size bits will be an inline immediate.
712static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
713 Mask = maskTrailingOnes<unsigned>(Size);
714 int SignedMask = static_cast<int>(Mask);
715 return SignedMask >= -16 && SignedMask <= 64;
716}
717
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000718bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
719 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
720 const DebugLoc &DL = I.getDebugLoc();
721 MachineBasicBlock &MBB = *I.getParent();
722 MachineFunction &MF = *MBB.getParent();
723 MachineRegisterInfo &MRI = MF.getRegInfo();
724 const unsigned DstReg = I.getOperand(0).getReg();
725 const unsigned SrcReg = I.getOperand(1).getReg();
726
727 const LLT DstTy = MRI.getType(DstReg);
728 const LLT SrcTy = MRI.getType(SrcReg);
729 const LLT S1 = LLT::scalar(1);
730 const unsigned SrcSize = SrcTy.getSizeInBits();
731 const unsigned DstSize = DstTy.getSizeInBits();
732 if (!DstTy.isScalar())
733 return false;
734
735 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
736
737 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
738 if (SrcTy != S1 || DstSize > 64) // Invalid
739 return false;
740
741 unsigned Opcode =
742 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
743 const TargetRegisterClass *DstRC =
744 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
745
746 // FIXME: Create an extra copy to avoid incorrectly constraining the result
747 // of the scc producer.
748 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
749 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
750 .addReg(SrcReg);
751 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
752 .addReg(TmpReg);
753
754 // The instruction operands are backwards from what you would expect.
755 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
756 .addImm(0)
757 .addImm(Signed ? -1 : 1);
758 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
759 }
760
761 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
762 if (SrcTy != S1) // Invalid
763 return false;
764
765 MachineInstr *ExtI =
766 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
767 .addImm(0) // src0_modifiers
768 .addImm(0) // src0
769 .addImm(0) // src1_modifiers
770 .addImm(Signed ? -1 : 1) // src1
771 .addUse(SrcReg);
772 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
773 }
774
775 if (I.getOpcode() == AMDGPU::G_ANYEXT)
776 return selectCOPY(I);
777
778 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
779 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000780
781 // Try to use an and with a mask if it will save code size.
782 unsigned Mask;
783 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
784 MachineInstr *ExtI =
785 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
786 .addImm(Mask)
787 .addReg(SrcReg);
788 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
789 }
790
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000791 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
792 MachineInstr *ExtI =
793 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
794 .addReg(SrcReg)
795 .addImm(0) // Offset
796 .addImm(SrcSize); // Width
797 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
798 }
799
800 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
801 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
802 return false;
803
804 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
805 const unsigned SextOpc = SrcSize == 8 ?
806 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
807 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
808 .addReg(SrcReg);
809 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
810 }
811
812 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
813 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
814
815 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
816 if (DstSize > 32 && SrcSize <= 32) {
817 // We need a 64-bit register source, but the high bits don't matter.
818 unsigned ExtReg
819 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
820 unsigned UndefReg
821 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
822 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
823 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
824 .addReg(SrcReg)
825 .addImm(AMDGPU::sub0)
826 .addReg(UndefReg)
827 .addImm(AMDGPU::sub1);
828
829 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
830 .addReg(ExtReg)
831 .addImm(SrcSize << 16);
832
833 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
834 }
835
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000836 unsigned Mask;
837 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
838 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
839 .addReg(SrcReg)
840 .addImm(Mask);
841 } else {
842 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
843 .addReg(SrcReg)
844 .addImm(SrcSize << 16);
845 }
846
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000847 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
848 }
849
850 return false;
851}
852
Tom Stellardca166212017-01-30 21:56:46 +0000853bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
854 MachineBasicBlock *BB = I.getParent();
855 MachineFunction *MF = BB->getParent();
856 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000857 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000858
Tom Stellarde182b282018-05-15 17:57:09 +0000859 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
860 if (ImmOp.isFPImm()) {
861 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
862 ImmOp.ChangeToImmediate(Imm.getZExtValue());
863 } else if (ImmOp.isCImm()) {
864 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
865 }
866
867 unsigned DstReg = I.getOperand(0).getReg();
868 unsigned Size;
869 bool IsSgpr;
870 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
871 if (RB) {
872 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
873 Size = MRI.getType(DstReg).getSizeInBits();
874 } else {
875 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
876 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000877 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000878 }
879
880 if (Size != 32 && Size != 64)
881 return false;
882
883 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000884 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000885 I.setDesc(TII.get(Opcode));
886 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000887 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
888 }
889
Tom Stellardca166212017-01-30 21:56:46 +0000890 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000891 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
892 &AMDGPU::VGPR_32RegClass;
893 unsigned LoReg = MRI.createVirtualRegister(RC);
894 unsigned HiReg = MRI.createVirtualRegister(RC);
895 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000896
Tom Stellarde182b282018-05-15 17:57:09 +0000897 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000898 .addImm(Imm.trunc(32).getZExtValue());
899
Tom Stellarde182b282018-05-15 17:57:09 +0000900 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000901 .addImm(Imm.ashr(32).getZExtValue());
902
Tom Stellarde182b282018-05-15 17:57:09 +0000903 const MachineInstr *RS =
904 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
905 .addReg(LoReg)
906 .addImm(AMDGPU::sub0)
907 .addReg(HiReg)
908 .addImm(AMDGPU::sub1);
909
Tom Stellardca166212017-01-30 21:56:46 +0000910 // We can't call constrainSelectedInstRegOperands here, because it doesn't
911 // work for target independent opcodes
912 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000913 const TargetRegisterClass *DstRC =
914 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
915 if (!DstRC)
916 return true;
917 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000918}
919
920static bool isConstant(const MachineInstr &MI) {
921 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
922}
923
924void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
925 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
926
927 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
928
929 assert(PtrMI);
930
931 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
932 return;
933
934 GEPInfo GEPInfo(*PtrMI);
935
936 for (unsigned i = 1, e = 3; i < e; ++i) {
937 const MachineOperand &GEPOp = PtrMI->getOperand(i);
938 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
939 assert(OpDef);
940 if (isConstant(*OpDef)) {
941 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
942 // are lacking other optimizations.
943 assert(GEPInfo.Imm == 0);
944 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
945 continue;
946 }
947 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
948 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
949 GEPInfo.SgprParts.push_back(GEPOp.getReg());
950 else
951 GEPInfo.VgprParts.push_back(GEPOp.getReg());
952 }
953
954 AddrInfo.push_back(GEPInfo);
955 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
956}
957
Tom Stellard79b5c382019-02-20 21:02:37 +0000958bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000959 if (!MI.hasOneMemOperand())
960 return false;
961
962 const MachineMemOperand *MMO = *MI.memoperands_begin();
963 const Value *Ptr = MMO->getValue();
964
965 // UndefValue means this is a load of a kernel input. These are uniform.
966 // Sometimes LDS instructions have constant pointers.
967 // If Ptr is null, then that means this mem operand contains a
968 // PseudoSourceValue like GOT.
969 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
970 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
971 return true;
972
Matt Arsenault923712b2018-02-09 16:57:57 +0000973 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
974 return true;
975
Tom Stellardca166212017-01-30 21:56:46 +0000976 const Instruction *I = dyn_cast<Instruction>(Ptr);
977 return I && I->getMetadata("amdgpu.uniform");
978}
979
Tom Stellardca166212017-01-30 21:56:46 +0000980bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
981 for (const GEPInfo &GEPInfo : AddrInfo) {
982 if (!GEPInfo.VgprParts.empty())
983 return true;
984 }
985 return false;
986}
987
Tom Stellardca166212017-01-30 21:56:46 +0000988bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
989 MachineBasicBlock *BB = I.getParent();
990 MachineFunction *MF = BB->getParent();
991 MachineRegisterInfo &MRI = MF->getRegInfo();
992 DebugLoc DL = I.getDebugLoc();
993 unsigned DstReg = I.getOperand(0).getReg();
994 unsigned PtrReg = I.getOperand(1).getReg();
995 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
996 unsigned Opcode;
997
998 SmallVector<GEPInfo, 4> AddrInfo;
999
1000 getAddrModeInfo(I, MRI, AddrInfo);
1001
Tom Stellardca166212017-01-30 21:56:46 +00001002 switch (LoadSize) {
1003 default:
1004 llvm_unreachable("Load size not supported\n");
1005 case 32:
1006 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1007 break;
1008 case 64:
1009 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1010 break;
1011 }
1012
1013 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1014 .add(I.getOperand(0))
1015 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001016 .addImm(0) // offset
1017 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001018 .addImm(0) // slc
1019 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001020
1021 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1022 I.eraseFromParent();
1023 return Ret;
1024}
1025
Matt Arsenault64642802019-07-01 15:39:27 +00001026bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1027 MachineBasicBlock *BB = I.getParent();
1028 MachineFunction *MF = BB->getParent();
1029 MachineRegisterInfo &MRI = MF->getRegInfo();
1030 MachineOperand &CondOp = I.getOperand(0);
1031 Register CondReg = CondOp.getReg();
1032 const DebugLoc &DL = I.getDebugLoc();
1033
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001034 unsigned BrOpcode;
1035 Register CondPhysReg;
1036 const TargetRegisterClass *ConstrainRC;
1037
1038 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1039 // whether the branch is uniform when selecting the instruction. In
1040 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1041 // RegBankSelect knows what it's doing if the branch condition is scc, even
1042 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001043 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001044 CondPhysReg = AMDGPU::SCC;
1045 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1046 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1047 } else if (isVCC(CondReg, MRI)) {
1048 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1049 // We sort of know that a VCC producer based on the register bank, that ands
1050 // inactive lanes with 0. What if there was a logical operation with vcc
1051 // producers in different blocks/with different exec masks?
1052 // FIXME: Should scc->vcc copies and with exec?
1053 CondPhysReg = TRI.getVCC();
1054 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1055 ConstrainRC = TRI.getBoolRC();
1056 } else
1057 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001058
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001059 if (!MRI.getRegClassOrNull(CondReg))
1060 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001061
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001062 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1063 .addReg(CondReg);
1064 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1065 .addMBB(I.getOperand(1).getMBB());
1066
1067 I.eraseFromParent();
1068 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001069}
1070
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001071bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1072 MachineBasicBlock *BB = I.getParent();
1073 MachineFunction *MF = BB->getParent();
1074 MachineRegisterInfo &MRI = MF->getRegInfo();
1075
1076 Register DstReg = I.getOperand(0).getReg();
1077 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1078 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1079 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1080 if (IsVGPR)
1081 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1082
1083 return RBI.constrainGenericRegister(
1084 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1085}
1086
Daniel Sandersf76f3152017-11-16 00:46:35 +00001087bool AMDGPUInstructionSelector::select(MachineInstr &I,
1088 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001089 if (I.isPHI())
1090 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001091
Tom Stellard7712ee82018-06-22 00:44:29 +00001092 if (!isPreISelGenericOpcode(I.getOpcode())) {
1093 if (I.isCopy())
1094 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001095 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001096 }
Tom Stellardca166212017-01-30 21:56:46 +00001097
1098 switch (I.getOpcode()) {
Tom Stellard9e9dd302019-07-01 16:09:33 +00001099 case TargetOpcode::G_ADD:
1100 if (selectG_ADD(I))
1101 return true;
1102 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001103 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001104 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001105 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001106 case TargetOpcode::G_BITCAST:
1107 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001108 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001109 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001110 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001111 case TargetOpcode::G_EXTRACT:
1112 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001113 case TargetOpcode::G_GEP:
1114 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001115 case TargetOpcode::G_IMPLICIT_DEF:
1116 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001117 case TargetOpcode::G_INSERT:
1118 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001119 case TargetOpcode::G_INTRINSIC:
1120 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001121 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1122 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001123 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001124 if (selectG_ICMP(I))
1125 return true;
1126 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001127 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001128 if (selectImpl(I, CoverageInfo))
1129 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001130 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001131 case TargetOpcode::G_SELECT:
1132 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001133 case TargetOpcode::G_STORE:
1134 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001135 case TargetOpcode::G_TRUNC:
1136 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001137 case TargetOpcode::G_SEXT:
1138 case TargetOpcode::G_ZEXT:
1139 case TargetOpcode::G_ANYEXT:
1140 if (selectG_SZA_EXT(I)) {
1141 I.eraseFromParent();
1142 return true;
1143 }
1144
1145 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001146 case TargetOpcode::G_BRCOND:
1147 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001148 case TargetOpcode::G_FRAME_INDEX:
1149 return selectG_FRAME_INDEX(I);
Tom Stellardca166212017-01-30 21:56:46 +00001150 }
1151 return false;
1152}
Tom Stellard1dc90202018-05-10 20:53:06 +00001153
Tom Stellard26fac0f2018-06-22 02:54:57 +00001154InstructionSelector::ComplexRendererFns
1155AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1156 return {{
1157 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1158 }};
1159
1160}
1161
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001162std::pair<Register, unsigned>
1163AMDGPUInstructionSelector::selectVOP3ModsImpl(
1164 Register Src, const MachineRegisterInfo &MRI) const {
1165 unsigned Mods = 0;
1166 MachineInstr *MI = MRI.getVRegDef(Src);
1167
1168 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1169 Src = MI->getOperand(1).getReg();
1170 Mods |= SISrcMods::NEG;
1171 MI = MRI.getVRegDef(Src);
1172 }
1173
1174 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1175 Src = MI->getOperand(1).getReg();
1176 Mods |= SISrcMods::ABS;
1177 }
1178
1179 return std::make_pair(Src, Mods);
1180}
1181
Tom Stellard1dc90202018-05-10 20:53:06 +00001182///
1183/// This will select either an SGPR or VGPR operand and will save us from
1184/// having to write an extra tablegen pattern.
1185InstructionSelector::ComplexRendererFns
1186AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1187 return {{
1188 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1189 }};
1190}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001191
1192InstructionSelector::ComplexRendererFns
1193AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001194 MachineRegisterInfo &MRI
1195 = Root.getParent()->getParent()->getParent()->getRegInfo();
1196
1197 Register Src;
1198 unsigned Mods;
1199 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1200
Tom Stellarddcc95e92018-05-11 05:44:16 +00001201 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001202 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1203 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1204 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1205 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001206 }};
1207}
Tom Stellard9a653572018-06-22 02:34:29 +00001208InstructionSelector::ComplexRendererFns
1209AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1210 return {{
1211 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1212 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1213 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1214 }};
1215}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001216
1217InstructionSelector::ComplexRendererFns
1218AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001219 MachineRegisterInfo &MRI
1220 = Root.getParent()->getParent()->getParent()->getRegInfo();
1221
1222 Register Src;
1223 unsigned Mods;
1224 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1225
Tom Stellard46bbbc32018-06-13 22:30:47 +00001226 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001227 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1228 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001229 }};
1230}
Tom Stellard79b5c382019-02-20 21:02:37 +00001231
1232InstructionSelector::ComplexRendererFns
1233AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1234 MachineRegisterInfo &MRI =
1235 Root.getParent()->getParent()->getParent()->getRegInfo();
1236
1237 SmallVector<GEPInfo, 4> AddrInfo;
1238 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1239
1240 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1241 return None;
1242
1243 const GEPInfo &GEPInfo = AddrInfo[0];
1244
1245 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1246 return None;
1247
1248 unsigned PtrReg = GEPInfo.SgprParts[0];
1249 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1250 return {{
1251 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1252 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1253 }};
1254}
1255
1256InstructionSelector::ComplexRendererFns
1257AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1258 MachineRegisterInfo &MRI =
1259 Root.getParent()->getParent()->getParent()->getRegInfo();
1260
1261 SmallVector<GEPInfo, 4> AddrInfo;
1262 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1263
1264 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1265 return None;
1266
1267 const GEPInfo &GEPInfo = AddrInfo[0];
1268 unsigned PtrReg = GEPInfo.SgprParts[0];
1269 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1270 if (!isUInt<32>(EncodedImm))
1271 return None;
1272
1273 return {{
1274 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1275 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1276 }};
1277}
1278
1279InstructionSelector::ComplexRendererFns
1280AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1281 MachineInstr *MI = Root.getParent();
1282 MachineBasicBlock *MBB = MI->getParent();
1283 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1284
1285 SmallVector<GEPInfo, 4> AddrInfo;
1286 getAddrModeInfo(*MI, MRI, AddrInfo);
1287
1288 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1289 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1290 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1291 return None;
1292
1293 const GEPInfo &GEPInfo = AddrInfo[0];
1294 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1295 return None;
1296
1297 // If we make it this far we have a load with an 32-bit immediate offset.
1298 // It is OK to select this using a sgpr offset, because we have already
1299 // failed trying to select this load into one of the _IMM variants since
1300 // the _IMM Patterns are considered before the _SGPR patterns.
1301 unsigned PtrReg = GEPInfo.SgprParts[0];
1302 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1303 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1304 .addImm(GEPInfo.Imm);
1305 return {{
1306 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1307 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1308 }};
1309}