blob: 7ea1a67efecfdcb7b4e4240f1520d37aae6b35d1 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Tom Stellard8b1c53b2019-06-17 16:27:43 +000062static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
Matt Arsenault9f992c22019-07-01 13:22:07 +000063 assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
Tom Stellard8b1c53b2019-06-17 16:27:43 +000064
65 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
66 const TargetRegisterClass *RC =
67 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000068 if (RC) {
69 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
70 return false;
71 const LLT Ty = MRI.getType(Reg);
72 return Ty.isValid() && Ty.getSizeInBits() == 1;
73 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000074
75 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
76 return RB->getID() == AMDGPU::SCCRegBankID;
77}
78
Matt Arsenault9f992c22019-07-01 13:22:07 +000079static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
80 const SIRegisterInfo &TRI) {
81 assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
82
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
84 const TargetRegisterClass *RC =
85 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
86 if (RC) {
87 return RC == TRI.getWaveMaskRegClass() &&
88 MRI.getType(Reg).getSizeInBits() == 1;
89 }
90
91 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
92 return RB->getID() == AMDGPU::VCCRegBankID;
93}
94
Tom Stellard1e0edad2018-05-10 21:20:10 +000095bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
96 MachineBasicBlock *BB = I.getParent();
97 MachineFunction *MF = BB->getParent();
98 MachineRegisterInfo &MRI = MF->getRegInfo();
99 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000100
101 // Special case for COPY from the scc register bank. The scc register bank
102 // is modeled using 32-bit sgprs.
103 const MachineOperand &Src = I.getOperand(1);
104 unsigned SrcReg = Src.getReg();
105 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000106 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000107
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 // Specially handle scc->vcc copies.
109 if (isVCC(DstReg, MRI, TRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000110 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000111 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 .addImm(0)
113 .addReg(SrcReg);
114 if (!MRI.getRegClassOrNull(SrcReg))
115 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
116 I.eraseFromParent();
117 return true;
118 }
119 }
120
Tom Stellard1e0edad2018-05-10 21:20:10 +0000121 for (const MachineOperand &MO : I.operands()) {
122 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
123 continue;
124
125 const TargetRegisterClass *RC =
126 TRI.getConstrainedRegClassForOperand(MO, MRI);
127 if (!RC)
128 continue;
129 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
130 }
131 return true;
132}
133
Tom Stellardca166212017-01-30 21:56:46 +0000134MachineOperand
135AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
136 unsigned SubIdx) const {
137
138 MachineInstr *MI = MO.getParent();
139 MachineBasicBlock *BB = MO.getParent()->getParent();
140 MachineFunction *MF = BB->getParent();
141 MachineRegisterInfo &MRI = MF->getRegInfo();
142 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
143
144 if (MO.isReg()) {
145 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
146 unsigned Reg = MO.getReg();
147 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
148 .addReg(Reg, 0, ComposedSubIdx);
149
150 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
151 MO.isKill(), MO.isDead(), MO.isUndef(),
152 MO.isEarlyClobber(), 0, MO.isDebug(),
153 MO.isInternalRead());
154 }
155
156 assert(MO.isImm());
157
158 APInt Imm(64, MO.getImm());
159
160 switch (SubIdx) {
161 default:
162 llvm_unreachable("do not know to split immediate with this sub index.");
163 case AMDGPU::sub0:
164 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
165 case AMDGPU::sub1:
166 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
167 }
168}
169
Tom Stellard390a5f42018-07-13 21:05:14 +0000170static int64_t getConstant(const MachineInstr *MI) {
171 return MI->getOperand(1).getCImm()->getSExtValue();
172}
173
Tom Stellardca166212017-01-30 21:56:46 +0000174bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
175 MachineBasicBlock *BB = I.getParent();
176 MachineFunction *MF = BB->getParent();
177 MachineRegisterInfo &MRI = MF->getRegInfo();
178 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
179 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
180 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
181
182 if (Size != 64)
183 return false;
184
185 DebugLoc DL = I.getDebugLoc();
186
Tom Stellard124f5cc2017-01-31 15:24:11 +0000187 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
188 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
189
Tom Stellardca166212017-01-30 21:56:46 +0000190 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000191 .add(Lo1)
192 .add(Lo2);
193
194 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
195 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000196
197 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000198 .add(Hi1)
199 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000200
201 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
202 .addReg(DstLo)
203 .addImm(AMDGPU::sub0)
204 .addReg(DstHi)
205 .addImm(AMDGPU::sub1);
206
207 for (MachineOperand &MO : I.explicit_operands()) {
208 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
209 continue;
210 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
211 }
212
213 I.eraseFromParent();
214 return true;
215}
216
Tom Stellard41f32192019-02-28 23:37:48 +0000217bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
218 MachineBasicBlock *BB = I.getParent();
219 MachineFunction *MF = BB->getParent();
220 MachineRegisterInfo &MRI = MF->getRegInfo();
221 assert(I.getOperand(2).getImm() % 32 == 0);
222 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
223 const DebugLoc &DL = I.getDebugLoc();
224 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
225 I.getOperand(0).getReg())
226 .addReg(I.getOperand(1).getReg(), 0, SubReg);
227
228 for (const MachineOperand &MO : Copy->operands()) {
229 const TargetRegisterClass *RC =
230 TRI.getConstrainedRegClassForOperand(MO, MRI);
231 if (!RC)
232 continue;
233 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
234 }
235 I.eraseFromParent();
236 return true;
237}
238
Tom Stellardca166212017-01-30 21:56:46 +0000239bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
240 return selectG_ADD(I);
241}
242
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000243bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
244 MachineBasicBlock *BB = I.getParent();
245 MachineFunction *MF = BB->getParent();
246 MachineRegisterInfo &MRI = MF->getRegInfo();
247 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000248
249 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
250 // regbank check here is to know why getConstrainedRegClassForOperand failed.
251 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
252 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
253 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
254 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
255 return true;
256 }
257
258 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000259}
260
Tom Stellard33634d1b2019-03-01 00:50:26 +0000261bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
262 MachineBasicBlock *BB = I.getParent();
263 MachineFunction *MF = BB->getParent();
264 MachineRegisterInfo &MRI = MF->getRegInfo();
265 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
266 DebugLoc DL = I.getDebugLoc();
267 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
268 .addDef(I.getOperand(0).getReg())
269 .addReg(I.getOperand(1).getReg())
270 .addReg(I.getOperand(2).getReg())
271 .addImm(SubReg);
272
273 for (const MachineOperand &MO : Ins->operands()) {
274 if (!MO.isReg())
275 continue;
276 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
277 continue;
278
279 const TargetRegisterClass *RC =
280 TRI.getConstrainedRegClassForOperand(MO, MRI);
281 if (!RC)
282 continue;
283 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
284 }
285 I.eraseFromParent();
286 return true;
287}
288
Tom Stellarda9284732018-06-14 19:26:37 +0000289bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
290 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000291 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000292 switch (IntrinsicID) {
293 default:
294 break;
Tom Stellardac684712018-07-13 22:16:03 +0000295 case Intrinsic::maxnum:
296 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000297 case Intrinsic::amdgcn_cvt_pkrtz:
298 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000299
300 case Intrinsic::amdgcn_kernarg_segment_ptr: {
301 MachineFunction *MF = I.getParent()->getParent();
302 MachineRegisterInfo &MRI = MF->getRegInfo();
303 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
304 const ArgDescriptor *InputPtrReg;
305 const TargetRegisterClass *RC;
306 const DebugLoc &DL = I.getDebugLoc();
307
308 std::tie(InputPtrReg, RC)
309 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
310 if (!InputPtrReg)
311 report_fatal_error("missing kernarg segment ptr");
312
313 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
314 .add(I.getOperand(0))
315 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
316 I.eraseFromParent();
317 return true;
318 }
Tom Stellarda9284732018-06-14 19:26:37 +0000319 }
320 return false;
321}
322
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000323static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
324 if (Size != 32 && Size != 64)
325 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000326 switch (P) {
327 default:
328 llvm_unreachable("Unknown condition code!");
329 case CmpInst::ICMP_NE:
330 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
331 case CmpInst::ICMP_EQ:
332 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
333 case CmpInst::ICMP_SGT:
334 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
335 case CmpInst::ICMP_SGE:
336 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
337 case CmpInst::ICMP_SLT:
338 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
339 case CmpInst::ICMP_SLE:
340 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
341 case CmpInst::ICMP_UGT:
342 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
343 case CmpInst::ICMP_UGE:
344 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
345 case CmpInst::ICMP_ULT:
346 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
347 case CmpInst::ICMP_ULE:
348 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
349 }
350}
351
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000352int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
353 unsigned Size) const {
354 if (Size == 64) {
355 if (!STI.hasScalarCompareEq64())
356 return -1;
357
358 switch (P) {
359 case CmpInst::ICMP_NE:
360 return AMDGPU::S_CMP_LG_U64;
361 case CmpInst::ICMP_EQ:
362 return AMDGPU::S_CMP_EQ_U64;
363 default:
364 return -1;
365 }
366 }
367
368 if (Size != 32)
369 return -1;
370
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000371 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000372 case CmpInst::ICMP_NE:
373 return AMDGPU::S_CMP_LG_U32;
374 case CmpInst::ICMP_EQ:
375 return AMDGPU::S_CMP_EQ_U32;
376 case CmpInst::ICMP_SGT:
377 return AMDGPU::S_CMP_GT_I32;
378 case CmpInst::ICMP_SGE:
379 return AMDGPU::S_CMP_GE_I32;
380 case CmpInst::ICMP_SLT:
381 return AMDGPU::S_CMP_LT_I32;
382 case CmpInst::ICMP_SLE:
383 return AMDGPU::S_CMP_LE_I32;
384 case CmpInst::ICMP_UGT:
385 return AMDGPU::S_CMP_GT_U32;
386 case CmpInst::ICMP_UGE:
387 return AMDGPU::S_CMP_GE_U32;
388 case CmpInst::ICMP_ULT:
389 return AMDGPU::S_CMP_LT_U32;
390 case CmpInst::ICMP_ULE:
391 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000392 default:
393 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000394 }
395}
396
397bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
398 MachineBasicBlock *BB = I.getParent();
399 MachineFunction *MF = BB->getParent();
400 MachineRegisterInfo &MRI = MF->getRegInfo();
401 DebugLoc DL = I.getDebugLoc();
402
403 unsigned SrcReg = I.getOperand(2).getReg();
404 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000405
406 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000407
408 unsigned CCReg = I.getOperand(0).getReg();
409 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000410 int Opcode = getS_CMPOpcode(Pred, Size);
411 if (Opcode == -1)
412 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000413 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
414 .add(I.getOperand(2))
415 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000416 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
417 .addReg(AMDGPU::SCC);
418 bool Ret =
419 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
420 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000421 I.eraseFromParent();
422 return Ret;
423 }
424
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000425 int Opcode = getV_CMPOpcode(Pred, Size);
426 if (Opcode == -1)
427 return false;
428
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000429 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
430 I.getOperand(0).getReg())
431 .add(I.getOperand(2))
432 .add(I.getOperand(3));
433 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
434 AMDGPU::SReg_64RegClass, MRI);
435 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
436 I.eraseFromParent();
437 return Ret;
438}
439
Tom Stellard390a5f42018-07-13 21:05:14 +0000440static MachineInstr *
441buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
442 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
443 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
444 const DebugLoc &DL = Insert->getDebugLoc();
445 MachineBasicBlock &BB = *Insert->getParent();
446 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
447 return BuildMI(BB, Insert, DL, TII.get(Opcode))
448 .addImm(Tgt)
449 .addReg(Reg0)
450 .addReg(Reg1)
451 .addReg(Reg2)
452 .addReg(Reg3)
453 .addImm(VM)
454 .addImm(Compr)
455 .addImm(Enabled);
456}
457
458bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
459 MachineInstr &I,
460 CodeGenCoverage &CoverageInfo) const {
461 MachineBasicBlock *BB = I.getParent();
462 MachineFunction *MF = BB->getParent();
463 MachineRegisterInfo &MRI = MF->getRegInfo();
464
465 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
466 switch (IntrinsicID) {
467 case Intrinsic::amdgcn_exp: {
468 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
469 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
470 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
471 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
472
473 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
474 I.getOperand(4).getReg(),
475 I.getOperand(5).getReg(),
476 I.getOperand(6).getReg(),
477 VM, false, Enabled, Done);
478
479 I.eraseFromParent();
480 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
481 }
482 case Intrinsic::amdgcn_exp_compr: {
483 const DebugLoc &DL = I.getDebugLoc();
484 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
485 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
486 unsigned Reg0 = I.getOperand(3).getReg();
487 unsigned Reg1 = I.getOperand(4).getReg();
488 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
489 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
490 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
491
492 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
493 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
494 true, Enabled, Done);
495
496 I.eraseFromParent();
497 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
498 }
499 }
500 return false;
501}
502
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000503bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
504 MachineBasicBlock *BB = I.getParent();
505 MachineFunction *MF = BB->getParent();
506 MachineRegisterInfo &MRI = MF->getRegInfo();
507 const DebugLoc &DL = I.getDebugLoc();
508
509 unsigned DstReg = I.getOperand(0).getReg();
510 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
511 assert(Size == 32 || Size == 64);
512 const MachineOperand &CCOp = I.getOperand(1);
513 unsigned CCReg = CCOp.getReg();
514 if (isSCC(CCReg, MRI)) {
515 unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
516 AMDGPU::S_CSELECT_B64;
517 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
518 .addReg(CCReg);
519
520 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
521 // bank, because it does not cover the register class that we used to represent
522 // for it. So we need to manually set the register class here.
523 if (!MRI.getRegClassOrNull(CCReg))
524 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
525 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
526 .add(I.getOperand(2))
527 .add(I.getOperand(3));
528
529 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
530 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
531 I.eraseFromParent();
532 return Ret;
533 }
534
535 assert(Size == 32);
536 // FIXME: Support 64-bit select
537 MachineInstr *Select =
538 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
539 .addImm(0)
540 .add(I.getOperand(3))
541 .addImm(0)
542 .add(I.getOperand(2))
543 .add(I.getOperand(1));
544
545 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
546 I.eraseFromParent();
547 return Ret;
548}
549
Tom Stellardca166212017-01-30 21:56:46 +0000550bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
551 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000552 MachineFunction *MF = BB->getParent();
553 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000554 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000555 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
556 if (PtrSize != 64) {
557 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
558 return false;
559 }
560
Tom Stellard655fdd32018-05-11 23:12:49 +0000561 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
562 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000563
564 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000565 switch (StoreSize) {
566 default:
567 return false;
568 case 32:
569 Opcode = AMDGPU::FLAT_STORE_DWORD;
570 break;
571 case 64:
572 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
573 break;
574 case 96:
575 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
576 break;
577 case 128:
578 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
579 break;
580 }
581
582 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000583 .add(I.getOperand(1))
584 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000585 .addImm(0) // offset
586 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000587 .addImm(0) // slc
588 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000589
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000590
Tom Stellardca166212017-01-30 21:56:46 +0000591 // Now that we selected an opcode, we need to constrain the register
592 // operands to use appropriate classes.
593 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
594
595 I.eraseFromParent();
596 return Ret;
597}
598
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000599static int sizeToSubRegIndex(unsigned Size) {
600 switch (Size) {
601 case 32:
602 return AMDGPU::sub0;
603 case 64:
604 return AMDGPU::sub0_sub1;
605 case 96:
606 return AMDGPU::sub0_sub1_sub2;
607 case 128:
608 return AMDGPU::sub0_sub1_sub2_sub3;
609 case 256:
610 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
611 default:
612 if (Size < 32)
613 return AMDGPU::sub0;
614 if (Size > 256)
615 return -1;
616 return sizeToSubRegIndex(PowerOf2Ceil(Size));
617 }
618}
619
620bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
621 MachineBasicBlock *BB = I.getParent();
622 MachineFunction *MF = BB->getParent();
623 MachineRegisterInfo &MRI = MF->getRegInfo();
624
625 unsigned DstReg = I.getOperand(0).getReg();
626 unsigned SrcReg = I.getOperand(1).getReg();
627 const LLT DstTy = MRI.getType(DstReg);
628 const LLT SrcTy = MRI.getType(SrcReg);
629 if (!DstTy.isScalar())
630 return false;
631
632 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
633 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
634 if (SrcRB != DstRB)
635 return false;
636
637 unsigned DstSize = DstTy.getSizeInBits();
638 unsigned SrcSize = SrcTy.getSizeInBits();
639
640 const TargetRegisterClass *SrcRC
641 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
642 const TargetRegisterClass *DstRC
643 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
644
645 if (SrcSize > 32) {
646 int SubRegIdx = sizeToSubRegIndex(DstSize);
647 if (SubRegIdx == -1)
648 return false;
649
650 // Deal with weird cases where the class only partially supports the subreg
651 // index.
652 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
653 if (!SrcRC)
654 return false;
655
656 I.getOperand(1).setSubReg(SubRegIdx);
657 }
658
659 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
660 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
661 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
662 return false;
663 }
664
665 I.setDesc(TII.get(TargetOpcode::COPY));
666 return true;
667}
668
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000669/// \returns true if a bitmask for \p Size bits will be an inline immediate.
670static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
671 Mask = maskTrailingOnes<unsigned>(Size);
672 int SignedMask = static_cast<int>(Mask);
673 return SignedMask >= -16 && SignedMask <= 64;
674}
675
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000676bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
677 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
678 const DebugLoc &DL = I.getDebugLoc();
679 MachineBasicBlock &MBB = *I.getParent();
680 MachineFunction &MF = *MBB.getParent();
681 MachineRegisterInfo &MRI = MF.getRegInfo();
682 const unsigned DstReg = I.getOperand(0).getReg();
683 const unsigned SrcReg = I.getOperand(1).getReg();
684
685 const LLT DstTy = MRI.getType(DstReg);
686 const LLT SrcTy = MRI.getType(SrcReg);
687 const LLT S1 = LLT::scalar(1);
688 const unsigned SrcSize = SrcTy.getSizeInBits();
689 const unsigned DstSize = DstTy.getSizeInBits();
690 if (!DstTy.isScalar())
691 return false;
692
693 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
694
695 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
696 if (SrcTy != S1 || DstSize > 64) // Invalid
697 return false;
698
699 unsigned Opcode =
700 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
701 const TargetRegisterClass *DstRC =
702 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
703
704 // FIXME: Create an extra copy to avoid incorrectly constraining the result
705 // of the scc producer.
706 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
707 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
708 .addReg(SrcReg);
709 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
710 .addReg(TmpReg);
711
712 // The instruction operands are backwards from what you would expect.
713 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
714 .addImm(0)
715 .addImm(Signed ? -1 : 1);
716 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
717 }
718
719 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
720 if (SrcTy != S1) // Invalid
721 return false;
722
723 MachineInstr *ExtI =
724 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
725 .addImm(0) // src0_modifiers
726 .addImm(0) // src0
727 .addImm(0) // src1_modifiers
728 .addImm(Signed ? -1 : 1) // src1
729 .addUse(SrcReg);
730 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
731 }
732
733 if (I.getOpcode() == AMDGPU::G_ANYEXT)
734 return selectCOPY(I);
735
736 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
737 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000738
739 // Try to use an and with a mask if it will save code size.
740 unsigned Mask;
741 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
742 MachineInstr *ExtI =
743 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
744 .addImm(Mask)
745 .addReg(SrcReg);
746 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
747 }
748
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000749 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
750 MachineInstr *ExtI =
751 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
752 .addReg(SrcReg)
753 .addImm(0) // Offset
754 .addImm(SrcSize); // Width
755 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
756 }
757
758 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
759 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
760 return false;
761
762 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
763 const unsigned SextOpc = SrcSize == 8 ?
764 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
765 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
766 .addReg(SrcReg);
767 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
768 }
769
770 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
771 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
772
773 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
774 if (DstSize > 32 && SrcSize <= 32) {
775 // We need a 64-bit register source, but the high bits don't matter.
776 unsigned ExtReg
777 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
778 unsigned UndefReg
779 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
780 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
781 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
782 .addReg(SrcReg)
783 .addImm(AMDGPU::sub0)
784 .addReg(UndefReg)
785 .addImm(AMDGPU::sub1);
786
787 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
788 .addReg(ExtReg)
789 .addImm(SrcSize << 16);
790
791 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
792 }
793
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000794 unsigned Mask;
795 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
796 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
797 .addReg(SrcReg)
798 .addImm(Mask);
799 } else {
800 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
801 .addReg(SrcReg)
802 .addImm(SrcSize << 16);
803 }
804
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000805 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
806 }
807
808 return false;
809}
810
Tom Stellardca166212017-01-30 21:56:46 +0000811bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
812 MachineBasicBlock *BB = I.getParent();
813 MachineFunction *MF = BB->getParent();
814 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000815 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000816
Tom Stellarde182b282018-05-15 17:57:09 +0000817 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
818 if (ImmOp.isFPImm()) {
819 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
820 ImmOp.ChangeToImmediate(Imm.getZExtValue());
821 } else if (ImmOp.isCImm()) {
822 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
823 }
824
825 unsigned DstReg = I.getOperand(0).getReg();
826 unsigned Size;
827 bool IsSgpr;
828 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
829 if (RB) {
830 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
831 Size = MRI.getType(DstReg).getSizeInBits();
832 } else {
833 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
834 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000835 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000836 }
837
838 if (Size != 32 && Size != 64)
839 return false;
840
841 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000842 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000843 I.setDesc(TII.get(Opcode));
844 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000845 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
846 }
847
Tom Stellardca166212017-01-30 21:56:46 +0000848 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000849 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
850 &AMDGPU::VGPR_32RegClass;
851 unsigned LoReg = MRI.createVirtualRegister(RC);
852 unsigned HiReg = MRI.createVirtualRegister(RC);
853 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000854
Tom Stellarde182b282018-05-15 17:57:09 +0000855 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000856 .addImm(Imm.trunc(32).getZExtValue());
857
Tom Stellarde182b282018-05-15 17:57:09 +0000858 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000859 .addImm(Imm.ashr(32).getZExtValue());
860
Tom Stellarde182b282018-05-15 17:57:09 +0000861 const MachineInstr *RS =
862 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
863 .addReg(LoReg)
864 .addImm(AMDGPU::sub0)
865 .addReg(HiReg)
866 .addImm(AMDGPU::sub1);
867
Tom Stellardca166212017-01-30 21:56:46 +0000868 // We can't call constrainSelectedInstRegOperands here, because it doesn't
869 // work for target independent opcodes
870 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000871 const TargetRegisterClass *DstRC =
872 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
873 if (!DstRC)
874 return true;
875 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000876}
877
878static bool isConstant(const MachineInstr &MI) {
879 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
880}
881
882void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
883 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
884
885 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
886
887 assert(PtrMI);
888
889 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
890 return;
891
892 GEPInfo GEPInfo(*PtrMI);
893
894 for (unsigned i = 1, e = 3; i < e; ++i) {
895 const MachineOperand &GEPOp = PtrMI->getOperand(i);
896 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
897 assert(OpDef);
898 if (isConstant(*OpDef)) {
899 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
900 // are lacking other optimizations.
901 assert(GEPInfo.Imm == 0);
902 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
903 continue;
904 }
905 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
906 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
907 GEPInfo.SgprParts.push_back(GEPOp.getReg());
908 else
909 GEPInfo.VgprParts.push_back(GEPOp.getReg());
910 }
911
912 AddrInfo.push_back(GEPInfo);
913 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
914}
915
Tom Stellard79b5c382019-02-20 21:02:37 +0000916bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000917 if (!MI.hasOneMemOperand())
918 return false;
919
920 const MachineMemOperand *MMO = *MI.memoperands_begin();
921 const Value *Ptr = MMO->getValue();
922
923 // UndefValue means this is a load of a kernel input. These are uniform.
924 // Sometimes LDS instructions have constant pointers.
925 // If Ptr is null, then that means this mem operand contains a
926 // PseudoSourceValue like GOT.
927 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
928 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
929 return true;
930
Matt Arsenault923712b2018-02-09 16:57:57 +0000931 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
932 return true;
933
Tom Stellardca166212017-01-30 21:56:46 +0000934 const Instruction *I = dyn_cast<Instruction>(Ptr);
935 return I && I->getMetadata("amdgpu.uniform");
936}
937
Tom Stellardca166212017-01-30 21:56:46 +0000938bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
939 for (const GEPInfo &GEPInfo : AddrInfo) {
940 if (!GEPInfo.VgprParts.empty())
941 return true;
942 }
943 return false;
944}
945
Tom Stellardca166212017-01-30 21:56:46 +0000946bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
947 MachineBasicBlock *BB = I.getParent();
948 MachineFunction *MF = BB->getParent();
949 MachineRegisterInfo &MRI = MF->getRegInfo();
950 DebugLoc DL = I.getDebugLoc();
951 unsigned DstReg = I.getOperand(0).getReg();
952 unsigned PtrReg = I.getOperand(1).getReg();
953 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
954 unsigned Opcode;
955
956 SmallVector<GEPInfo, 4> AddrInfo;
957
958 getAddrModeInfo(I, MRI, AddrInfo);
959
Tom Stellardca166212017-01-30 21:56:46 +0000960 switch (LoadSize) {
961 default:
962 llvm_unreachable("Load size not supported\n");
963 case 32:
964 Opcode = AMDGPU::FLAT_LOAD_DWORD;
965 break;
966 case 64:
967 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
968 break;
969 }
970
971 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
972 .add(I.getOperand(0))
973 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000974 .addImm(0) // offset
975 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000976 .addImm(0) // slc
977 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000978
979 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
980 I.eraseFromParent();
981 return Ret;
982}
983
Daniel Sandersf76f3152017-11-16 00:46:35 +0000984bool AMDGPUInstructionSelector::select(MachineInstr &I,
985 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +0000986
Tom Stellard7712ee82018-06-22 00:44:29 +0000987 if (!isPreISelGenericOpcode(I.getOpcode())) {
988 if (I.isCopy())
989 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000990 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +0000991 }
Tom Stellardca166212017-01-30 21:56:46 +0000992
993 switch (I.getOpcode()) {
994 default:
Tom Stellard1dc90202018-05-10 20:53:06 +0000995 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000996 case TargetOpcode::G_ADD:
997 return selectG_ADD(I);
Tom Stellard7c650782018-10-05 04:34:09 +0000998 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +0000999 case TargetOpcode::G_BITCAST:
1000 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001001 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001002 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001003 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001004 case TargetOpcode::G_EXTRACT:
1005 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001006 case TargetOpcode::G_GEP:
1007 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001008 case TargetOpcode::G_IMPLICIT_DEF:
1009 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001010 case TargetOpcode::G_INSERT:
1011 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001012 case TargetOpcode::G_INTRINSIC:
1013 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001014 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1015 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001016 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001017 if (selectG_ICMP(I))
1018 return true;
1019 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001020 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001021 if (selectImpl(I, CoverageInfo))
1022 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001023 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001024 case TargetOpcode::G_SELECT:
1025 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001026 case TargetOpcode::G_STORE:
1027 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001028 case TargetOpcode::G_TRUNC:
1029 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001030 case TargetOpcode::G_SEXT:
1031 case TargetOpcode::G_ZEXT:
1032 case TargetOpcode::G_ANYEXT:
1033 if (selectG_SZA_EXT(I)) {
1034 I.eraseFromParent();
1035 return true;
1036 }
1037
1038 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001039 }
1040 return false;
1041}
Tom Stellard1dc90202018-05-10 20:53:06 +00001042
Tom Stellard26fac0f2018-06-22 02:54:57 +00001043InstructionSelector::ComplexRendererFns
1044AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1045 return {{
1046 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1047 }};
1048
1049}
1050
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001051std::pair<Register, unsigned>
1052AMDGPUInstructionSelector::selectVOP3ModsImpl(
1053 Register Src, const MachineRegisterInfo &MRI) const {
1054 unsigned Mods = 0;
1055 MachineInstr *MI = MRI.getVRegDef(Src);
1056
1057 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1058 Src = MI->getOperand(1).getReg();
1059 Mods |= SISrcMods::NEG;
1060 MI = MRI.getVRegDef(Src);
1061 }
1062
1063 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1064 Src = MI->getOperand(1).getReg();
1065 Mods |= SISrcMods::ABS;
1066 }
1067
1068 return std::make_pair(Src, Mods);
1069}
1070
Tom Stellard1dc90202018-05-10 20:53:06 +00001071///
1072/// This will select either an SGPR or VGPR operand and will save us from
1073/// having to write an extra tablegen pattern.
1074InstructionSelector::ComplexRendererFns
1075AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1076 return {{
1077 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1078 }};
1079}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001080
1081InstructionSelector::ComplexRendererFns
1082AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001083 MachineRegisterInfo &MRI
1084 = Root.getParent()->getParent()->getParent()->getRegInfo();
1085
1086 Register Src;
1087 unsigned Mods;
1088 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1089
Tom Stellarddcc95e92018-05-11 05:44:16 +00001090 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001091 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1092 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1093 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1094 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001095 }};
1096}
Tom Stellard9a653572018-06-22 02:34:29 +00001097InstructionSelector::ComplexRendererFns
1098AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1099 return {{
1100 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1101 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1102 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1103 }};
1104}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001105
1106InstructionSelector::ComplexRendererFns
1107AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001108 MachineRegisterInfo &MRI
1109 = Root.getParent()->getParent()->getParent()->getRegInfo();
1110
1111 Register Src;
1112 unsigned Mods;
1113 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1114
Tom Stellard46bbbc32018-06-13 22:30:47 +00001115 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001116 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1117 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001118 }};
1119}
Tom Stellard79b5c382019-02-20 21:02:37 +00001120
1121InstructionSelector::ComplexRendererFns
1122AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1123 MachineRegisterInfo &MRI =
1124 Root.getParent()->getParent()->getParent()->getRegInfo();
1125
1126 SmallVector<GEPInfo, 4> AddrInfo;
1127 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1128
1129 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1130 return None;
1131
1132 const GEPInfo &GEPInfo = AddrInfo[0];
1133
1134 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1135 return None;
1136
1137 unsigned PtrReg = GEPInfo.SgprParts[0];
1138 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1139 return {{
1140 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1141 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1142 }};
1143}
1144
1145InstructionSelector::ComplexRendererFns
1146AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1147 MachineRegisterInfo &MRI =
1148 Root.getParent()->getParent()->getParent()->getRegInfo();
1149
1150 SmallVector<GEPInfo, 4> AddrInfo;
1151 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1152
1153 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1154 return None;
1155
1156 const GEPInfo &GEPInfo = AddrInfo[0];
1157 unsigned PtrReg = GEPInfo.SgprParts[0];
1158 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1159 if (!isUInt<32>(EncodedImm))
1160 return None;
1161
1162 return {{
1163 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1164 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1165 }};
1166}
1167
1168InstructionSelector::ComplexRendererFns
1169AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1170 MachineInstr *MI = Root.getParent();
1171 MachineBasicBlock *MBB = MI->getParent();
1172 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1173
1174 SmallVector<GEPInfo, 4> AddrInfo;
1175 getAddrModeInfo(*MI, MRI, AddrInfo);
1176
1177 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1178 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1179 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1180 return None;
1181
1182 const GEPInfo &GEPInfo = AddrInfo[0];
1183 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1184 return None;
1185
1186 // If we make it this far we have a load with an 32-bit immediate offset.
1187 // It is OK to select this using a sgpr offset, because we have already
1188 // failed trying to select this load into one of the _IMM variants since
1189 // the _IMM Patterns are considered before the _SGPR patterns.
1190 unsigned PtrReg = GEPInfo.SgprParts[0];
1191 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1192 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1193 .addImm(GEPInfo.Imm);
1194 return {{
1195 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1196 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1197 }};
1198}