blob: 5eab5cb9227310bcae2dce06258b41dfc4caccaa [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Tom Stellard8b1c53b2019-06-17 16:27:43 +000062static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
Matt Arsenault9f992c22019-07-01 13:22:07 +000063 assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
Tom Stellard8b1c53b2019-06-17 16:27:43 +000064
65 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
66 const TargetRegisterClass *RC =
67 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000068 if (RC) {
69 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
70 return false;
71 const LLT Ty = MRI.getType(Reg);
72 return Ty.isValid() && Ty.getSizeInBits() == 1;
73 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000074
75 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
76 return RB->getID() == AMDGPU::SCCRegBankID;
77}
78
Matt Arsenault9f992c22019-07-01 13:22:07 +000079static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
80 const SIRegisterInfo &TRI) {
81 assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
82
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
84 const TargetRegisterClass *RC =
85 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
86 if (RC) {
87 return RC == TRI.getWaveMaskRegClass() &&
88 MRI.getType(Reg).getSizeInBits() == 1;
89 }
90
91 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
92 return RB->getID() == AMDGPU::VCCRegBankID;
93}
94
Tom Stellard1e0edad2018-05-10 21:20:10 +000095bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
96 MachineBasicBlock *BB = I.getParent();
97 MachineFunction *MF = BB->getParent();
98 MachineRegisterInfo &MRI = MF->getRegInfo();
99 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000100
101 // Special case for COPY from the scc register bank. The scc register bank
102 // is modeled using 32-bit sgprs.
103 const MachineOperand &Src = I.getOperand(1);
104 unsigned SrcReg = Src.getReg();
105 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000106 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000107
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 // Specially handle scc->vcc copies.
109 if (isVCC(DstReg, MRI, TRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000110 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000111 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 .addImm(0)
113 .addReg(SrcReg);
114 if (!MRI.getRegClassOrNull(SrcReg))
115 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
116 I.eraseFromParent();
117 return true;
118 }
119 }
120
Tom Stellard1e0edad2018-05-10 21:20:10 +0000121 for (const MachineOperand &MO : I.operands()) {
122 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
123 continue;
124
125 const TargetRegisterClass *RC =
126 TRI.getConstrainedRegClassForOperand(MO, MRI);
127 if (!RC)
128 continue;
129 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
130 }
131 return true;
132}
133
Tom Stellardca166212017-01-30 21:56:46 +0000134MachineOperand
135AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
136 unsigned SubIdx) const {
137
138 MachineInstr *MI = MO.getParent();
139 MachineBasicBlock *BB = MO.getParent()->getParent();
140 MachineFunction *MF = BB->getParent();
141 MachineRegisterInfo &MRI = MF->getRegInfo();
142 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
143
144 if (MO.isReg()) {
145 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
146 unsigned Reg = MO.getReg();
147 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
148 .addReg(Reg, 0, ComposedSubIdx);
149
150 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
151 MO.isKill(), MO.isDead(), MO.isUndef(),
152 MO.isEarlyClobber(), 0, MO.isDebug(),
153 MO.isInternalRead());
154 }
155
156 assert(MO.isImm());
157
158 APInt Imm(64, MO.getImm());
159
160 switch (SubIdx) {
161 default:
162 llvm_unreachable("do not know to split immediate with this sub index.");
163 case AMDGPU::sub0:
164 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
165 case AMDGPU::sub1:
166 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
167 }
168}
169
Tom Stellard390a5f42018-07-13 21:05:14 +0000170static int64_t getConstant(const MachineInstr *MI) {
171 return MI->getOperand(1).getCImm()->getSExtValue();
172}
173
Tom Stellardca166212017-01-30 21:56:46 +0000174bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
175 MachineBasicBlock *BB = I.getParent();
176 MachineFunction *MF = BB->getParent();
177 MachineRegisterInfo &MRI = MF->getRegInfo();
178 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
179 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
180 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
181
182 if (Size != 64)
183 return false;
184
185 DebugLoc DL = I.getDebugLoc();
186
Tom Stellard124f5cc2017-01-31 15:24:11 +0000187 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
188 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
189
Tom Stellardca166212017-01-30 21:56:46 +0000190 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000191 .add(Lo1)
192 .add(Lo2);
193
194 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
195 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000196
197 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000198 .add(Hi1)
199 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000200
201 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
202 .addReg(DstLo)
203 .addImm(AMDGPU::sub0)
204 .addReg(DstHi)
205 .addImm(AMDGPU::sub1);
206
207 for (MachineOperand &MO : I.explicit_operands()) {
208 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
209 continue;
210 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
211 }
212
213 I.eraseFromParent();
214 return true;
215}
216
Tom Stellard41f32192019-02-28 23:37:48 +0000217bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
218 MachineBasicBlock *BB = I.getParent();
219 MachineFunction *MF = BB->getParent();
220 MachineRegisterInfo &MRI = MF->getRegInfo();
221 assert(I.getOperand(2).getImm() % 32 == 0);
222 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
223 const DebugLoc &DL = I.getDebugLoc();
224 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
225 I.getOperand(0).getReg())
226 .addReg(I.getOperand(1).getReg(), 0, SubReg);
227
228 for (const MachineOperand &MO : Copy->operands()) {
229 const TargetRegisterClass *RC =
230 TRI.getConstrainedRegClassForOperand(MO, MRI);
231 if (!RC)
232 continue;
233 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
234 }
235 I.eraseFromParent();
236 return true;
237}
238
Tom Stellardca166212017-01-30 21:56:46 +0000239bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
240 return selectG_ADD(I);
241}
242
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000243bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
244 MachineBasicBlock *BB = I.getParent();
245 MachineFunction *MF = BB->getParent();
246 MachineRegisterInfo &MRI = MF->getRegInfo();
247 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000248
249 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
250 // regbank check here is to know why getConstrainedRegClassForOperand failed.
251 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
252 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
253 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
254 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
255 return true;
256 }
257
258 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000259}
260
Tom Stellard33634d1b2019-03-01 00:50:26 +0000261bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
262 MachineBasicBlock *BB = I.getParent();
263 MachineFunction *MF = BB->getParent();
264 MachineRegisterInfo &MRI = MF->getRegInfo();
265 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
266 DebugLoc DL = I.getDebugLoc();
267 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
268 .addDef(I.getOperand(0).getReg())
269 .addReg(I.getOperand(1).getReg())
270 .addReg(I.getOperand(2).getReg())
271 .addImm(SubReg);
272
273 for (const MachineOperand &MO : Ins->operands()) {
274 if (!MO.isReg())
275 continue;
276 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
277 continue;
278
279 const TargetRegisterClass *RC =
280 TRI.getConstrainedRegClassForOperand(MO, MRI);
281 if (!RC)
282 continue;
283 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
284 }
285 I.eraseFromParent();
286 return true;
287}
288
Tom Stellarda9284732018-06-14 19:26:37 +0000289bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
290 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000291 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000292 switch (IntrinsicID) {
293 default:
294 break;
Tom Stellardac684712018-07-13 22:16:03 +0000295 case Intrinsic::maxnum:
296 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000297 case Intrinsic::amdgcn_cvt_pkrtz:
298 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000299
300 case Intrinsic::amdgcn_kernarg_segment_ptr: {
301 MachineFunction *MF = I.getParent()->getParent();
302 MachineRegisterInfo &MRI = MF->getRegInfo();
303 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
304 const ArgDescriptor *InputPtrReg;
305 const TargetRegisterClass *RC;
306 const DebugLoc &DL = I.getDebugLoc();
307
308 std::tie(InputPtrReg, RC)
309 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
310 if (!InputPtrReg)
311 report_fatal_error("missing kernarg segment ptr");
312
313 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
314 .add(I.getOperand(0))
315 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
316 I.eraseFromParent();
317 return true;
318 }
Tom Stellarda9284732018-06-14 19:26:37 +0000319 }
320 return false;
321}
322
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000323static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
324 if (Size != 32 && Size != 64)
325 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000326 switch (P) {
327 default:
328 llvm_unreachable("Unknown condition code!");
329 case CmpInst::ICMP_NE:
330 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
331 case CmpInst::ICMP_EQ:
332 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
333 case CmpInst::ICMP_SGT:
334 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
335 case CmpInst::ICMP_SGE:
336 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
337 case CmpInst::ICMP_SLT:
338 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
339 case CmpInst::ICMP_SLE:
340 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
341 case CmpInst::ICMP_UGT:
342 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
343 case CmpInst::ICMP_UGE:
344 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
345 case CmpInst::ICMP_ULT:
346 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
347 case CmpInst::ICMP_ULE:
348 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
349 }
350}
351
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000352int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
353 unsigned Size) const {
354 if (Size == 64) {
355 if (!STI.hasScalarCompareEq64())
356 return -1;
357
358 switch (P) {
359 case CmpInst::ICMP_NE:
360 return AMDGPU::S_CMP_LG_U64;
361 case CmpInst::ICMP_EQ:
362 return AMDGPU::S_CMP_EQ_U64;
363 default:
364 return -1;
365 }
366 }
367
368 if (Size != 32)
369 return -1;
370
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000371 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000372 case CmpInst::ICMP_NE:
373 return AMDGPU::S_CMP_LG_U32;
374 case CmpInst::ICMP_EQ:
375 return AMDGPU::S_CMP_EQ_U32;
376 case CmpInst::ICMP_SGT:
377 return AMDGPU::S_CMP_GT_I32;
378 case CmpInst::ICMP_SGE:
379 return AMDGPU::S_CMP_GE_I32;
380 case CmpInst::ICMP_SLT:
381 return AMDGPU::S_CMP_LT_I32;
382 case CmpInst::ICMP_SLE:
383 return AMDGPU::S_CMP_LE_I32;
384 case CmpInst::ICMP_UGT:
385 return AMDGPU::S_CMP_GT_U32;
386 case CmpInst::ICMP_UGE:
387 return AMDGPU::S_CMP_GE_U32;
388 case CmpInst::ICMP_ULT:
389 return AMDGPU::S_CMP_LT_U32;
390 case CmpInst::ICMP_ULE:
391 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000392 default:
393 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000394 }
395}
396
397bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
398 MachineBasicBlock *BB = I.getParent();
399 MachineFunction *MF = BB->getParent();
400 MachineRegisterInfo &MRI = MF->getRegInfo();
401 DebugLoc DL = I.getDebugLoc();
402
403 unsigned SrcReg = I.getOperand(2).getReg();
404 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000405
406 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000407
408 unsigned CCReg = I.getOperand(0).getReg();
409 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000410 int Opcode = getS_CMPOpcode(Pred, Size);
411 if (Opcode == -1)
412 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000413 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
414 .add(I.getOperand(2))
415 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000416 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
417 .addReg(AMDGPU::SCC);
418 bool Ret =
419 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
420 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000421 I.eraseFromParent();
422 return Ret;
423 }
424
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000425 int Opcode = getV_CMPOpcode(Pred, Size);
426 if (Opcode == -1)
427 return false;
428
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000429 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
430 I.getOperand(0).getReg())
431 .add(I.getOperand(2))
432 .add(I.getOperand(3));
433 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
434 AMDGPU::SReg_64RegClass, MRI);
435 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
436 I.eraseFromParent();
437 return Ret;
438}
439
Tom Stellard390a5f42018-07-13 21:05:14 +0000440static MachineInstr *
441buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
442 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
443 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
444 const DebugLoc &DL = Insert->getDebugLoc();
445 MachineBasicBlock &BB = *Insert->getParent();
446 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
447 return BuildMI(BB, Insert, DL, TII.get(Opcode))
448 .addImm(Tgt)
449 .addReg(Reg0)
450 .addReg(Reg1)
451 .addReg(Reg2)
452 .addReg(Reg3)
453 .addImm(VM)
454 .addImm(Compr)
455 .addImm(Enabled);
456}
457
458bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
459 MachineInstr &I,
460 CodeGenCoverage &CoverageInfo) const {
461 MachineBasicBlock *BB = I.getParent();
462 MachineFunction *MF = BB->getParent();
463 MachineRegisterInfo &MRI = MF->getRegInfo();
464
465 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
466 switch (IntrinsicID) {
467 case Intrinsic::amdgcn_exp: {
468 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
469 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
470 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
471 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
472
473 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
474 I.getOperand(4).getReg(),
475 I.getOperand(5).getReg(),
476 I.getOperand(6).getReg(),
477 VM, false, Enabled, Done);
478
479 I.eraseFromParent();
480 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
481 }
482 case Intrinsic::amdgcn_exp_compr: {
483 const DebugLoc &DL = I.getDebugLoc();
484 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
485 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
486 unsigned Reg0 = I.getOperand(3).getReg();
487 unsigned Reg1 = I.getOperand(4).getReg();
488 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
489 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
490 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
491
492 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
493 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
494 true, Enabled, Done);
495
496 I.eraseFromParent();
497 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
498 }
499 }
500 return false;
501}
502
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000503bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
504 MachineBasicBlock *BB = I.getParent();
505 MachineFunction *MF = BB->getParent();
506 MachineRegisterInfo &MRI = MF->getRegInfo();
507 const DebugLoc &DL = I.getDebugLoc();
508
509 unsigned DstReg = I.getOperand(0).getReg();
510 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000511 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000512 const MachineOperand &CCOp = I.getOperand(1);
513 unsigned CCReg = CCOp.getReg();
514 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000515 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
516 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000517 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
518 .addReg(CCReg);
519
520 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
521 // bank, because it does not cover the register class that we used to represent
522 // for it. So we need to manually set the register class here.
523 if (!MRI.getRegClassOrNull(CCReg))
524 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
525 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
526 .add(I.getOperand(2))
527 .add(I.getOperand(3));
528
529 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
530 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
531 I.eraseFromParent();
532 return Ret;
533 }
534
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000535 // Wide VGPR select should have been split in RegBankSelect.
536 if (Size > 32)
537 return false;
538
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000539 MachineInstr *Select =
540 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
541 .addImm(0)
542 .add(I.getOperand(3))
543 .addImm(0)
544 .add(I.getOperand(2))
545 .add(I.getOperand(1));
546
547 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
548 I.eraseFromParent();
549 return Ret;
550}
551
Tom Stellardca166212017-01-30 21:56:46 +0000552bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
553 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000554 MachineFunction *MF = BB->getParent();
555 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000556 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000557 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
558 if (PtrSize != 64) {
559 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
560 return false;
561 }
562
Tom Stellard655fdd32018-05-11 23:12:49 +0000563 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
564 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000565
566 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000567 switch (StoreSize) {
568 default:
569 return false;
570 case 32:
571 Opcode = AMDGPU::FLAT_STORE_DWORD;
572 break;
573 case 64:
574 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
575 break;
576 case 96:
577 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
578 break;
579 case 128:
580 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
581 break;
582 }
583
584 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000585 .add(I.getOperand(1))
586 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000587 .addImm(0) // offset
588 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000589 .addImm(0) // slc
590 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000591
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000592
Tom Stellardca166212017-01-30 21:56:46 +0000593 // Now that we selected an opcode, we need to constrain the register
594 // operands to use appropriate classes.
595 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
596
597 I.eraseFromParent();
598 return Ret;
599}
600
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000601static int sizeToSubRegIndex(unsigned Size) {
602 switch (Size) {
603 case 32:
604 return AMDGPU::sub0;
605 case 64:
606 return AMDGPU::sub0_sub1;
607 case 96:
608 return AMDGPU::sub0_sub1_sub2;
609 case 128:
610 return AMDGPU::sub0_sub1_sub2_sub3;
611 case 256:
612 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
613 default:
614 if (Size < 32)
615 return AMDGPU::sub0;
616 if (Size > 256)
617 return -1;
618 return sizeToSubRegIndex(PowerOf2Ceil(Size));
619 }
620}
621
622bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
623 MachineBasicBlock *BB = I.getParent();
624 MachineFunction *MF = BB->getParent();
625 MachineRegisterInfo &MRI = MF->getRegInfo();
626
627 unsigned DstReg = I.getOperand(0).getReg();
628 unsigned SrcReg = I.getOperand(1).getReg();
629 const LLT DstTy = MRI.getType(DstReg);
630 const LLT SrcTy = MRI.getType(SrcReg);
631 if (!DstTy.isScalar())
632 return false;
633
634 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
635 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
636 if (SrcRB != DstRB)
637 return false;
638
639 unsigned DstSize = DstTy.getSizeInBits();
640 unsigned SrcSize = SrcTy.getSizeInBits();
641
642 const TargetRegisterClass *SrcRC
643 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
644 const TargetRegisterClass *DstRC
645 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
646
647 if (SrcSize > 32) {
648 int SubRegIdx = sizeToSubRegIndex(DstSize);
649 if (SubRegIdx == -1)
650 return false;
651
652 // Deal with weird cases where the class only partially supports the subreg
653 // index.
654 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
655 if (!SrcRC)
656 return false;
657
658 I.getOperand(1).setSubReg(SubRegIdx);
659 }
660
661 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
662 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
663 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
664 return false;
665 }
666
667 I.setDesc(TII.get(TargetOpcode::COPY));
668 return true;
669}
670
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000671/// \returns true if a bitmask for \p Size bits will be an inline immediate.
672static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
673 Mask = maskTrailingOnes<unsigned>(Size);
674 int SignedMask = static_cast<int>(Mask);
675 return SignedMask >= -16 && SignedMask <= 64;
676}
677
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000678bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
679 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
680 const DebugLoc &DL = I.getDebugLoc();
681 MachineBasicBlock &MBB = *I.getParent();
682 MachineFunction &MF = *MBB.getParent();
683 MachineRegisterInfo &MRI = MF.getRegInfo();
684 const unsigned DstReg = I.getOperand(0).getReg();
685 const unsigned SrcReg = I.getOperand(1).getReg();
686
687 const LLT DstTy = MRI.getType(DstReg);
688 const LLT SrcTy = MRI.getType(SrcReg);
689 const LLT S1 = LLT::scalar(1);
690 const unsigned SrcSize = SrcTy.getSizeInBits();
691 const unsigned DstSize = DstTy.getSizeInBits();
692 if (!DstTy.isScalar())
693 return false;
694
695 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
696
697 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
698 if (SrcTy != S1 || DstSize > 64) // Invalid
699 return false;
700
701 unsigned Opcode =
702 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
703 const TargetRegisterClass *DstRC =
704 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
705
706 // FIXME: Create an extra copy to avoid incorrectly constraining the result
707 // of the scc producer.
708 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
709 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
710 .addReg(SrcReg);
711 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
712 .addReg(TmpReg);
713
714 // The instruction operands are backwards from what you would expect.
715 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
716 .addImm(0)
717 .addImm(Signed ? -1 : 1);
718 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
719 }
720
721 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
722 if (SrcTy != S1) // Invalid
723 return false;
724
725 MachineInstr *ExtI =
726 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
727 .addImm(0) // src0_modifiers
728 .addImm(0) // src0
729 .addImm(0) // src1_modifiers
730 .addImm(Signed ? -1 : 1) // src1
731 .addUse(SrcReg);
732 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
733 }
734
735 if (I.getOpcode() == AMDGPU::G_ANYEXT)
736 return selectCOPY(I);
737
738 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
739 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000740
741 // Try to use an and with a mask if it will save code size.
742 unsigned Mask;
743 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
744 MachineInstr *ExtI =
745 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
746 .addImm(Mask)
747 .addReg(SrcReg);
748 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
749 }
750
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000751 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
752 MachineInstr *ExtI =
753 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
754 .addReg(SrcReg)
755 .addImm(0) // Offset
756 .addImm(SrcSize); // Width
757 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
758 }
759
760 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
761 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
762 return false;
763
764 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
765 const unsigned SextOpc = SrcSize == 8 ?
766 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
767 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
768 .addReg(SrcReg);
769 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
770 }
771
772 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
773 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
774
775 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
776 if (DstSize > 32 && SrcSize <= 32) {
777 // We need a 64-bit register source, but the high bits don't matter.
778 unsigned ExtReg
779 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
780 unsigned UndefReg
781 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
782 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
783 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
784 .addReg(SrcReg)
785 .addImm(AMDGPU::sub0)
786 .addReg(UndefReg)
787 .addImm(AMDGPU::sub1);
788
789 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
790 .addReg(ExtReg)
791 .addImm(SrcSize << 16);
792
793 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
794 }
795
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000796 unsigned Mask;
797 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
798 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
799 .addReg(SrcReg)
800 .addImm(Mask);
801 } else {
802 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
803 .addReg(SrcReg)
804 .addImm(SrcSize << 16);
805 }
806
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000807 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
808 }
809
810 return false;
811}
812
Tom Stellardca166212017-01-30 21:56:46 +0000813bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
814 MachineBasicBlock *BB = I.getParent();
815 MachineFunction *MF = BB->getParent();
816 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000817 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000818
Tom Stellarde182b282018-05-15 17:57:09 +0000819 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
820 if (ImmOp.isFPImm()) {
821 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
822 ImmOp.ChangeToImmediate(Imm.getZExtValue());
823 } else if (ImmOp.isCImm()) {
824 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
825 }
826
827 unsigned DstReg = I.getOperand(0).getReg();
828 unsigned Size;
829 bool IsSgpr;
830 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
831 if (RB) {
832 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
833 Size = MRI.getType(DstReg).getSizeInBits();
834 } else {
835 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
836 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000837 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000838 }
839
840 if (Size != 32 && Size != 64)
841 return false;
842
843 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000844 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000845 I.setDesc(TII.get(Opcode));
846 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000847 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
848 }
849
Tom Stellardca166212017-01-30 21:56:46 +0000850 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000851 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
852 &AMDGPU::VGPR_32RegClass;
853 unsigned LoReg = MRI.createVirtualRegister(RC);
854 unsigned HiReg = MRI.createVirtualRegister(RC);
855 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000856
Tom Stellarde182b282018-05-15 17:57:09 +0000857 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000858 .addImm(Imm.trunc(32).getZExtValue());
859
Tom Stellarde182b282018-05-15 17:57:09 +0000860 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000861 .addImm(Imm.ashr(32).getZExtValue());
862
Tom Stellarde182b282018-05-15 17:57:09 +0000863 const MachineInstr *RS =
864 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
865 .addReg(LoReg)
866 .addImm(AMDGPU::sub0)
867 .addReg(HiReg)
868 .addImm(AMDGPU::sub1);
869
Tom Stellardca166212017-01-30 21:56:46 +0000870 // We can't call constrainSelectedInstRegOperands here, because it doesn't
871 // work for target independent opcodes
872 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000873 const TargetRegisterClass *DstRC =
874 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
875 if (!DstRC)
876 return true;
877 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000878}
879
880static bool isConstant(const MachineInstr &MI) {
881 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
882}
883
884void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
885 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
886
887 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
888
889 assert(PtrMI);
890
891 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
892 return;
893
894 GEPInfo GEPInfo(*PtrMI);
895
896 for (unsigned i = 1, e = 3; i < e; ++i) {
897 const MachineOperand &GEPOp = PtrMI->getOperand(i);
898 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
899 assert(OpDef);
900 if (isConstant(*OpDef)) {
901 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
902 // are lacking other optimizations.
903 assert(GEPInfo.Imm == 0);
904 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
905 continue;
906 }
907 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
908 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
909 GEPInfo.SgprParts.push_back(GEPOp.getReg());
910 else
911 GEPInfo.VgprParts.push_back(GEPOp.getReg());
912 }
913
914 AddrInfo.push_back(GEPInfo);
915 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
916}
917
Tom Stellard79b5c382019-02-20 21:02:37 +0000918bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000919 if (!MI.hasOneMemOperand())
920 return false;
921
922 const MachineMemOperand *MMO = *MI.memoperands_begin();
923 const Value *Ptr = MMO->getValue();
924
925 // UndefValue means this is a load of a kernel input. These are uniform.
926 // Sometimes LDS instructions have constant pointers.
927 // If Ptr is null, then that means this mem operand contains a
928 // PseudoSourceValue like GOT.
929 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
930 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
931 return true;
932
Matt Arsenault923712b2018-02-09 16:57:57 +0000933 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
934 return true;
935
Tom Stellardca166212017-01-30 21:56:46 +0000936 const Instruction *I = dyn_cast<Instruction>(Ptr);
937 return I && I->getMetadata("amdgpu.uniform");
938}
939
Tom Stellardca166212017-01-30 21:56:46 +0000940bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
941 for (const GEPInfo &GEPInfo : AddrInfo) {
942 if (!GEPInfo.VgprParts.empty())
943 return true;
944 }
945 return false;
946}
947
Tom Stellardca166212017-01-30 21:56:46 +0000948bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
949 MachineBasicBlock *BB = I.getParent();
950 MachineFunction *MF = BB->getParent();
951 MachineRegisterInfo &MRI = MF->getRegInfo();
952 DebugLoc DL = I.getDebugLoc();
953 unsigned DstReg = I.getOperand(0).getReg();
954 unsigned PtrReg = I.getOperand(1).getReg();
955 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
956 unsigned Opcode;
957
958 SmallVector<GEPInfo, 4> AddrInfo;
959
960 getAddrModeInfo(I, MRI, AddrInfo);
961
Tom Stellardca166212017-01-30 21:56:46 +0000962 switch (LoadSize) {
963 default:
964 llvm_unreachable("Load size not supported\n");
965 case 32:
966 Opcode = AMDGPU::FLAT_LOAD_DWORD;
967 break;
968 case 64:
969 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
970 break;
971 }
972
973 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
974 .add(I.getOperand(0))
975 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000976 .addImm(0) // offset
977 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000978 .addImm(0) // slc
979 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000980
981 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
982 I.eraseFromParent();
983 return Ret;
984}
985
Matt Arsenault64642802019-07-01 15:39:27 +0000986bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
987 MachineBasicBlock *BB = I.getParent();
988 MachineFunction *MF = BB->getParent();
989 MachineRegisterInfo &MRI = MF->getRegInfo();
990 MachineOperand &CondOp = I.getOperand(0);
991 Register CondReg = CondOp.getReg();
992 const DebugLoc &DL = I.getDebugLoc();
993
994 if (isSCC(CondReg, MRI)) {
995 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
996 // whether the branch is uniform when selecting the instruction. In
997 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
998 // RegBankSelect knows what it's doing if the branch condition is scc, even
999 // though it currently does not.
1000 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1001 .addReg(CondReg);
1002 if (!MRI.getRegClassOrNull(CondReg)) {
1003 const TargetRegisterClass *RC
1004 = TRI.getConstrainedRegClassForOperand(CondOp, MRI);
1005 MRI.setRegClass(CondReg, RC);
1006 }
1007
1008 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_CBRANCH_SCC1))
1009 .addMBB(I.getOperand(1).getMBB());
1010 I.eraseFromParent();
1011 return true;
1012 }
1013
1014 return false;
1015}
1016
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001017bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1018 MachineBasicBlock *BB = I.getParent();
1019 MachineFunction *MF = BB->getParent();
1020 MachineRegisterInfo &MRI = MF->getRegInfo();
1021
1022 Register DstReg = I.getOperand(0).getReg();
1023 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1024 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1025 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1026 if (IsVGPR)
1027 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1028
1029 return RBI.constrainGenericRegister(
1030 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1031}
1032
Daniel Sandersf76f3152017-11-16 00:46:35 +00001033bool AMDGPUInstructionSelector::select(MachineInstr &I,
1034 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +00001035
Tom Stellard7712ee82018-06-22 00:44:29 +00001036 if (!isPreISelGenericOpcode(I.getOpcode())) {
1037 if (I.isCopy())
1038 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001039 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001040 }
Tom Stellardca166212017-01-30 21:56:46 +00001041
1042 switch (I.getOpcode()) {
1043 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001044 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001045 case TargetOpcode::G_ADD:
1046 return selectG_ADD(I);
Tom Stellard7c650782018-10-05 04:34:09 +00001047 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001048 case TargetOpcode::G_BITCAST:
1049 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001050 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001051 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001052 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001053 case TargetOpcode::G_EXTRACT:
1054 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001055 case TargetOpcode::G_GEP:
1056 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001057 case TargetOpcode::G_IMPLICIT_DEF:
1058 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001059 case TargetOpcode::G_INSERT:
1060 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001061 case TargetOpcode::G_INTRINSIC:
1062 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001063 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1064 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001065 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001066 if (selectG_ICMP(I))
1067 return true;
1068 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001069 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001070 if (selectImpl(I, CoverageInfo))
1071 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001072 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001073 case TargetOpcode::G_SELECT:
1074 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001075 case TargetOpcode::G_STORE:
1076 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001077 case TargetOpcode::G_TRUNC:
1078 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001079 case TargetOpcode::G_SEXT:
1080 case TargetOpcode::G_ZEXT:
1081 case TargetOpcode::G_ANYEXT:
1082 if (selectG_SZA_EXT(I)) {
1083 I.eraseFromParent();
1084 return true;
1085 }
1086
1087 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001088 case TargetOpcode::G_BRCOND:
1089 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001090 case TargetOpcode::G_FRAME_INDEX:
1091 return selectG_FRAME_INDEX(I);
Tom Stellardca166212017-01-30 21:56:46 +00001092 }
1093 return false;
1094}
Tom Stellard1dc90202018-05-10 20:53:06 +00001095
Tom Stellard26fac0f2018-06-22 02:54:57 +00001096InstructionSelector::ComplexRendererFns
1097AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1098 return {{
1099 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1100 }};
1101
1102}
1103
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001104std::pair<Register, unsigned>
1105AMDGPUInstructionSelector::selectVOP3ModsImpl(
1106 Register Src, const MachineRegisterInfo &MRI) const {
1107 unsigned Mods = 0;
1108 MachineInstr *MI = MRI.getVRegDef(Src);
1109
1110 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1111 Src = MI->getOperand(1).getReg();
1112 Mods |= SISrcMods::NEG;
1113 MI = MRI.getVRegDef(Src);
1114 }
1115
1116 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1117 Src = MI->getOperand(1).getReg();
1118 Mods |= SISrcMods::ABS;
1119 }
1120
1121 return std::make_pair(Src, Mods);
1122}
1123
Tom Stellard1dc90202018-05-10 20:53:06 +00001124///
1125/// This will select either an SGPR or VGPR operand and will save us from
1126/// having to write an extra tablegen pattern.
1127InstructionSelector::ComplexRendererFns
1128AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1129 return {{
1130 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1131 }};
1132}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001133
1134InstructionSelector::ComplexRendererFns
1135AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001136 MachineRegisterInfo &MRI
1137 = Root.getParent()->getParent()->getParent()->getRegInfo();
1138
1139 Register Src;
1140 unsigned Mods;
1141 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1142
Tom Stellarddcc95e92018-05-11 05:44:16 +00001143 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001144 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1145 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1146 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1147 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001148 }};
1149}
Tom Stellard9a653572018-06-22 02:34:29 +00001150InstructionSelector::ComplexRendererFns
1151AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1152 return {{
1153 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1154 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1155 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1156 }};
1157}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001158
1159InstructionSelector::ComplexRendererFns
1160AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001161 MachineRegisterInfo &MRI
1162 = Root.getParent()->getParent()->getParent()->getRegInfo();
1163
1164 Register Src;
1165 unsigned Mods;
1166 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1167
Tom Stellard46bbbc32018-06-13 22:30:47 +00001168 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001169 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1170 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001171 }};
1172}
Tom Stellard79b5c382019-02-20 21:02:37 +00001173
1174InstructionSelector::ComplexRendererFns
1175AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1176 MachineRegisterInfo &MRI =
1177 Root.getParent()->getParent()->getParent()->getRegInfo();
1178
1179 SmallVector<GEPInfo, 4> AddrInfo;
1180 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1181
1182 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1183 return None;
1184
1185 const GEPInfo &GEPInfo = AddrInfo[0];
1186
1187 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1188 return None;
1189
1190 unsigned PtrReg = GEPInfo.SgprParts[0];
1191 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1192 return {{
1193 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1194 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1195 }};
1196}
1197
1198InstructionSelector::ComplexRendererFns
1199AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1200 MachineRegisterInfo &MRI =
1201 Root.getParent()->getParent()->getParent()->getRegInfo();
1202
1203 SmallVector<GEPInfo, 4> AddrInfo;
1204 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1205
1206 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1207 return None;
1208
1209 const GEPInfo &GEPInfo = AddrInfo[0];
1210 unsigned PtrReg = GEPInfo.SgprParts[0];
1211 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1212 if (!isUInt<32>(EncodedImm))
1213 return None;
1214
1215 return {{
1216 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1217 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1218 }};
1219}
1220
1221InstructionSelector::ComplexRendererFns
1222AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1223 MachineInstr *MI = Root.getParent();
1224 MachineBasicBlock *MBB = MI->getParent();
1225 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1226
1227 SmallVector<GEPInfo, 4> AddrInfo;
1228 getAddrModeInfo(*MI, MRI, AddrInfo);
1229
1230 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1231 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1232 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1233 return None;
1234
1235 const GEPInfo &GEPInfo = AddrInfo[0];
1236 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1237 return None;
1238
1239 // If we make it this far we have a load with an 32-bit immediate offset.
1240 // It is OK to select this using a sgpr offset, because we have already
1241 // failed trying to select this load into one of the _IMM variants since
1242 // the _IMM Patterns are considered before the _SGPR patterns.
1243 unsigned PtrReg = GEPInfo.SgprParts[0];
1244 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1245 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1246 .addImm(GEPInfo.Imm);
1247 return {{
1248 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1249 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1250 }};
1251}