blob: a084b5d31dabd6be125d48fb8a7e7a7dd1cfccee [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
71 return false;
72 const LLT Ty = MRI.getType(Reg);
73 return Ty.isValid() && Ty.getSizeInBits() == 1;
74 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000075
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Matt Arsenault2ab25f92019-07-01 16:06:02 +000080bool AMDGPUInstructionSelector::isVCC(Register Reg,
81 const MachineRegisterInfo &MRI) const {
82 if (TargetRegisterInfo::isPhysicalRegister(Reg))
83 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000084
85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
86 const TargetRegisterClass *RC =
87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
88 if (RC) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +000089 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault9f992c22019-07-01 13:22:07 +000090 MRI.getType(Reg).getSizeInBits() == 1;
91 }
92
93 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
94 return RB->getID() == AMDGPU::VCCRegBankID;
95}
96
Tom Stellard1e0edad2018-05-10 21:20:10 +000097bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
98 MachineBasicBlock *BB = I.getParent();
99 MachineFunction *MF = BB->getParent();
100 MachineRegisterInfo &MRI = MF->getRegInfo();
101 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000102
103 // Special case for COPY from the scc register bank. The scc register bank
104 // is modeled using 32-bit sgprs.
105 const MachineOperand &Src = I.getOperand(1);
106 unsigned SrcReg = Src.getReg();
107 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
Matt Arsenault9f992c22019-07-01 13:22:07 +0000108 unsigned DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Matt Arsenault9f992c22019-07-01 13:22:07 +0000110 // Specially handle scc->vcc copies.
Matt Arsenault2ab25f92019-07-01 16:06:02 +0000111 if (isVCC(DstReg, MRI)) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000112 const DebugLoc &DL = I.getDebugLoc();
Matt Arsenault9f992c22019-07-01 13:22:07 +0000113 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000114 .addImm(0)
115 .addReg(SrcReg);
116 if (!MRI.getRegClassOrNull(SrcReg))
117 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
118 I.eraseFromParent();
119 return true;
120 }
121 }
122
Tom Stellard1e0edad2018-05-10 21:20:10 +0000123 for (const MachineOperand &MO : I.operands()) {
124 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
125 continue;
126
127 const TargetRegisterClass *RC =
128 TRI.getConstrainedRegClassForOperand(MO, MRI);
129 if (!RC)
130 continue;
131 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
132 }
133 return true;
134}
135
Tom Stellardca166212017-01-30 21:56:46 +0000136MachineOperand
137AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
138 unsigned SubIdx) const {
139
140 MachineInstr *MI = MO.getParent();
141 MachineBasicBlock *BB = MO.getParent()->getParent();
142 MachineFunction *MF = BB->getParent();
143 MachineRegisterInfo &MRI = MF->getRegInfo();
144 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
145
146 if (MO.isReg()) {
147 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
148 unsigned Reg = MO.getReg();
149 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
150 .addReg(Reg, 0, ComposedSubIdx);
151
152 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
153 MO.isKill(), MO.isDead(), MO.isUndef(),
154 MO.isEarlyClobber(), 0, MO.isDebug(),
155 MO.isInternalRead());
156 }
157
158 assert(MO.isImm());
159
160 APInt Imm(64, MO.getImm());
161
162 switch (SubIdx) {
163 default:
164 llvm_unreachable("do not know to split immediate with this sub index.");
165 case AMDGPU::sub0:
166 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
167 case AMDGPU::sub1:
168 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
169 }
170}
171
Tom Stellard390a5f42018-07-13 21:05:14 +0000172static int64_t getConstant(const MachineInstr *MI) {
173 return MI->getOperand(1).getCImm()->getSExtValue();
174}
175
Tom Stellardca166212017-01-30 21:56:46 +0000176bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
177 MachineBasicBlock *BB = I.getParent();
178 MachineFunction *MF = BB->getParent();
179 MachineRegisterInfo &MRI = MF->getRegInfo();
180 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
181 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
182 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
183
184 if (Size != 64)
185 return false;
186
187 DebugLoc DL = I.getDebugLoc();
188
Tom Stellard124f5cc2017-01-31 15:24:11 +0000189 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
190 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
191
Tom Stellardca166212017-01-30 21:56:46 +0000192 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000193 .add(Lo1)
194 .add(Lo2);
195
196 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
197 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000198
199 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000200 .add(Hi1)
201 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000202
203 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
204 .addReg(DstLo)
205 .addImm(AMDGPU::sub0)
206 .addReg(DstHi)
207 .addImm(AMDGPU::sub1);
208
209 for (MachineOperand &MO : I.explicit_operands()) {
210 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
211 continue;
212 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
213 }
214
215 I.eraseFromParent();
216 return true;
217}
218
Tom Stellard41f32192019-02-28 23:37:48 +0000219bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
220 MachineBasicBlock *BB = I.getParent();
221 MachineFunction *MF = BB->getParent();
222 MachineRegisterInfo &MRI = MF->getRegInfo();
223 assert(I.getOperand(2).getImm() % 32 == 0);
224 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
225 const DebugLoc &DL = I.getDebugLoc();
226 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
227 I.getOperand(0).getReg())
228 .addReg(I.getOperand(1).getReg(), 0, SubReg);
229
230 for (const MachineOperand &MO : Copy->operands()) {
231 const TargetRegisterClass *RC =
232 TRI.getConstrainedRegClassForOperand(MO, MRI);
233 if (!RC)
234 continue;
235 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
236 }
237 I.eraseFromParent();
238 return true;
239}
240
Tom Stellardca166212017-01-30 21:56:46 +0000241bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
242 return selectG_ADD(I);
243}
244
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000245bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
246 MachineBasicBlock *BB = I.getParent();
247 MachineFunction *MF = BB->getParent();
248 MachineRegisterInfo &MRI = MF->getRegInfo();
249 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000250
251 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
252 // regbank check here is to know why getConstrainedRegClassForOperand failed.
253 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
254 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
255 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
256 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
257 return true;
258 }
259
260 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000261}
262
Tom Stellard33634d1b2019-03-01 00:50:26 +0000263bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
264 MachineBasicBlock *BB = I.getParent();
265 MachineFunction *MF = BB->getParent();
266 MachineRegisterInfo &MRI = MF->getRegInfo();
267 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
268 DebugLoc DL = I.getDebugLoc();
269 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
270 .addDef(I.getOperand(0).getReg())
271 .addReg(I.getOperand(1).getReg())
272 .addReg(I.getOperand(2).getReg())
273 .addImm(SubReg);
274
275 for (const MachineOperand &MO : Ins->operands()) {
276 if (!MO.isReg())
277 continue;
278 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
279 continue;
280
281 const TargetRegisterClass *RC =
282 TRI.getConstrainedRegClassForOperand(MO, MRI);
283 if (!RC)
284 continue;
285 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
286 }
287 I.eraseFromParent();
288 return true;
289}
290
Tom Stellarda9284732018-06-14 19:26:37 +0000291bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
292 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000293 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000294 switch (IntrinsicID) {
295 default:
296 break;
Tom Stellardac684712018-07-13 22:16:03 +0000297 case Intrinsic::maxnum:
298 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000299 case Intrinsic::amdgcn_cvt_pkrtz:
300 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000301
302 case Intrinsic::amdgcn_kernarg_segment_ptr: {
303 MachineFunction *MF = I.getParent()->getParent();
304 MachineRegisterInfo &MRI = MF->getRegInfo();
305 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
306 const ArgDescriptor *InputPtrReg;
307 const TargetRegisterClass *RC;
308 const DebugLoc &DL = I.getDebugLoc();
309
310 std::tie(InputPtrReg, RC)
311 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
312 if (!InputPtrReg)
313 report_fatal_error("missing kernarg segment ptr");
314
315 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
316 .add(I.getOperand(0))
317 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
318 I.eraseFromParent();
319 return true;
320 }
Tom Stellarda9284732018-06-14 19:26:37 +0000321 }
322 return false;
323}
324
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000325static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
326 if (Size != 32 && Size != 64)
327 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000328 switch (P) {
329 default:
330 llvm_unreachable("Unknown condition code!");
331 case CmpInst::ICMP_NE:
332 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
333 case CmpInst::ICMP_EQ:
334 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
335 case CmpInst::ICMP_SGT:
336 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
337 case CmpInst::ICMP_SGE:
338 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
339 case CmpInst::ICMP_SLT:
340 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
341 case CmpInst::ICMP_SLE:
342 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
343 case CmpInst::ICMP_UGT:
344 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
345 case CmpInst::ICMP_UGE:
346 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
347 case CmpInst::ICMP_ULT:
348 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
349 case CmpInst::ICMP_ULE:
350 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
351 }
352}
353
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000354int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
355 unsigned Size) const {
356 if (Size == 64) {
357 if (!STI.hasScalarCompareEq64())
358 return -1;
359
360 switch (P) {
361 case CmpInst::ICMP_NE:
362 return AMDGPU::S_CMP_LG_U64;
363 case CmpInst::ICMP_EQ:
364 return AMDGPU::S_CMP_EQ_U64;
365 default:
366 return -1;
367 }
368 }
369
370 if (Size != 32)
371 return -1;
372
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000373 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000374 case CmpInst::ICMP_NE:
375 return AMDGPU::S_CMP_LG_U32;
376 case CmpInst::ICMP_EQ:
377 return AMDGPU::S_CMP_EQ_U32;
378 case CmpInst::ICMP_SGT:
379 return AMDGPU::S_CMP_GT_I32;
380 case CmpInst::ICMP_SGE:
381 return AMDGPU::S_CMP_GE_I32;
382 case CmpInst::ICMP_SLT:
383 return AMDGPU::S_CMP_LT_I32;
384 case CmpInst::ICMP_SLE:
385 return AMDGPU::S_CMP_LE_I32;
386 case CmpInst::ICMP_UGT:
387 return AMDGPU::S_CMP_GT_U32;
388 case CmpInst::ICMP_UGE:
389 return AMDGPU::S_CMP_GE_U32;
390 case CmpInst::ICMP_ULT:
391 return AMDGPU::S_CMP_LT_U32;
392 case CmpInst::ICMP_ULE:
393 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000394 default:
395 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000396 }
397}
398
399bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
400 MachineBasicBlock *BB = I.getParent();
401 MachineFunction *MF = BB->getParent();
402 MachineRegisterInfo &MRI = MF->getRegInfo();
403 DebugLoc DL = I.getDebugLoc();
404
405 unsigned SrcReg = I.getOperand(2).getReg();
406 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000407
408 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000409
410 unsigned CCReg = I.getOperand(0).getReg();
411 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000412 int Opcode = getS_CMPOpcode(Pred, Size);
413 if (Opcode == -1)
414 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000415 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
416 .add(I.getOperand(2))
417 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000418 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
419 .addReg(AMDGPU::SCC);
420 bool Ret =
421 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
422 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000423 I.eraseFromParent();
424 return Ret;
425 }
426
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000427 int Opcode = getV_CMPOpcode(Pred, Size);
428 if (Opcode == -1)
429 return false;
430
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000431 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
432 I.getOperand(0).getReg())
433 .add(I.getOperand(2))
434 .add(I.getOperand(3));
435 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
436 AMDGPU::SReg_64RegClass, MRI);
437 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
438 I.eraseFromParent();
439 return Ret;
440}
441
Tom Stellard390a5f42018-07-13 21:05:14 +0000442static MachineInstr *
443buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
444 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
445 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
446 const DebugLoc &DL = Insert->getDebugLoc();
447 MachineBasicBlock &BB = *Insert->getParent();
448 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
449 return BuildMI(BB, Insert, DL, TII.get(Opcode))
450 .addImm(Tgt)
451 .addReg(Reg0)
452 .addReg(Reg1)
453 .addReg(Reg2)
454 .addReg(Reg3)
455 .addImm(VM)
456 .addImm(Compr)
457 .addImm(Enabled);
458}
459
460bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
461 MachineInstr &I,
462 CodeGenCoverage &CoverageInfo) const {
463 MachineBasicBlock *BB = I.getParent();
464 MachineFunction *MF = BB->getParent();
465 MachineRegisterInfo &MRI = MF->getRegInfo();
466
467 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
468 switch (IntrinsicID) {
469 case Intrinsic::amdgcn_exp: {
470 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
471 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
472 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
473 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
474
475 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
476 I.getOperand(4).getReg(),
477 I.getOperand(5).getReg(),
478 I.getOperand(6).getReg(),
479 VM, false, Enabled, Done);
480
481 I.eraseFromParent();
482 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
483 }
484 case Intrinsic::amdgcn_exp_compr: {
485 const DebugLoc &DL = I.getDebugLoc();
486 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
487 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
488 unsigned Reg0 = I.getOperand(3).getReg();
489 unsigned Reg1 = I.getOperand(4).getReg();
490 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
491 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
492 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
493
494 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
495 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
496 true, Enabled, Done);
497
498 I.eraseFromParent();
499 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
500 }
501 }
502 return false;
503}
504
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000505bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
506 MachineBasicBlock *BB = I.getParent();
507 MachineFunction *MF = BB->getParent();
508 MachineRegisterInfo &MRI = MF->getRegInfo();
509 const DebugLoc &DL = I.getDebugLoc();
510
511 unsigned DstReg = I.getOperand(0).getReg();
512 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000513 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000514 const MachineOperand &CCOp = I.getOperand(1);
515 unsigned CCReg = CCOp.getReg();
516 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000517 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
518 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000519 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
520 .addReg(CCReg);
521
522 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
523 // bank, because it does not cover the register class that we used to represent
524 // for it. So we need to manually set the register class here.
525 if (!MRI.getRegClassOrNull(CCReg))
526 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
527 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
528 .add(I.getOperand(2))
529 .add(I.getOperand(3));
530
531 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
532 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
533 I.eraseFromParent();
534 return Ret;
535 }
536
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000537 // Wide VGPR select should have been split in RegBankSelect.
538 if (Size > 32)
539 return false;
540
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000541 MachineInstr *Select =
542 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
543 .addImm(0)
544 .add(I.getOperand(3))
545 .addImm(0)
546 .add(I.getOperand(2))
547 .add(I.getOperand(1));
548
549 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
550 I.eraseFromParent();
551 return Ret;
552}
553
Tom Stellardca166212017-01-30 21:56:46 +0000554bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
555 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000556 MachineFunction *MF = BB->getParent();
557 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000558 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000559 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
560 if (PtrSize != 64) {
561 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
562 return false;
563 }
564
Tom Stellard655fdd32018-05-11 23:12:49 +0000565 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
566 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000567
568 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000569 switch (StoreSize) {
570 default:
571 return false;
572 case 32:
573 Opcode = AMDGPU::FLAT_STORE_DWORD;
574 break;
575 case 64:
576 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
577 break;
578 case 96:
579 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
580 break;
581 case 128:
582 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
583 break;
584 }
585
586 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000587 .add(I.getOperand(1))
588 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000589 .addImm(0) // offset
590 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000591 .addImm(0) // slc
592 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000593
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000594
Tom Stellardca166212017-01-30 21:56:46 +0000595 // Now that we selected an opcode, we need to constrain the register
596 // operands to use appropriate classes.
597 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
598
599 I.eraseFromParent();
600 return Ret;
601}
602
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000603static int sizeToSubRegIndex(unsigned Size) {
604 switch (Size) {
605 case 32:
606 return AMDGPU::sub0;
607 case 64:
608 return AMDGPU::sub0_sub1;
609 case 96:
610 return AMDGPU::sub0_sub1_sub2;
611 case 128:
612 return AMDGPU::sub0_sub1_sub2_sub3;
613 case 256:
614 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
615 default:
616 if (Size < 32)
617 return AMDGPU::sub0;
618 if (Size > 256)
619 return -1;
620 return sizeToSubRegIndex(PowerOf2Ceil(Size));
621 }
622}
623
624bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
625 MachineBasicBlock *BB = I.getParent();
626 MachineFunction *MF = BB->getParent();
627 MachineRegisterInfo &MRI = MF->getRegInfo();
628
629 unsigned DstReg = I.getOperand(0).getReg();
630 unsigned SrcReg = I.getOperand(1).getReg();
631 const LLT DstTy = MRI.getType(DstReg);
632 const LLT SrcTy = MRI.getType(SrcReg);
633 if (!DstTy.isScalar())
634 return false;
635
636 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
637 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
638 if (SrcRB != DstRB)
639 return false;
640
641 unsigned DstSize = DstTy.getSizeInBits();
642 unsigned SrcSize = SrcTy.getSizeInBits();
643
644 const TargetRegisterClass *SrcRC
645 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
646 const TargetRegisterClass *DstRC
647 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
648
649 if (SrcSize > 32) {
650 int SubRegIdx = sizeToSubRegIndex(DstSize);
651 if (SubRegIdx == -1)
652 return false;
653
654 // Deal with weird cases where the class only partially supports the subreg
655 // index.
656 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
657 if (!SrcRC)
658 return false;
659
660 I.getOperand(1).setSubReg(SubRegIdx);
661 }
662
663 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
664 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
665 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
666 return false;
667 }
668
669 I.setDesc(TII.get(TargetOpcode::COPY));
670 return true;
671}
672
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000673/// \returns true if a bitmask for \p Size bits will be an inline immediate.
674static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
675 Mask = maskTrailingOnes<unsigned>(Size);
676 int SignedMask = static_cast<int>(Mask);
677 return SignedMask >= -16 && SignedMask <= 64;
678}
679
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000680bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
681 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
682 const DebugLoc &DL = I.getDebugLoc();
683 MachineBasicBlock &MBB = *I.getParent();
684 MachineFunction &MF = *MBB.getParent();
685 MachineRegisterInfo &MRI = MF.getRegInfo();
686 const unsigned DstReg = I.getOperand(0).getReg();
687 const unsigned SrcReg = I.getOperand(1).getReg();
688
689 const LLT DstTy = MRI.getType(DstReg);
690 const LLT SrcTy = MRI.getType(SrcReg);
691 const LLT S1 = LLT::scalar(1);
692 const unsigned SrcSize = SrcTy.getSizeInBits();
693 const unsigned DstSize = DstTy.getSizeInBits();
694 if (!DstTy.isScalar())
695 return false;
696
697 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
698
699 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
700 if (SrcTy != S1 || DstSize > 64) // Invalid
701 return false;
702
703 unsigned Opcode =
704 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
705 const TargetRegisterClass *DstRC =
706 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
707
708 // FIXME: Create an extra copy to avoid incorrectly constraining the result
709 // of the scc producer.
710 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
711 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
712 .addReg(SrcReg);
713 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
714 .addReg(TmpReg);
715
716 // The instruction operands are backwards from what you would expect.
717 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
718 .addImm(0)
719 .addImm(Signed ? -1 : 1);
720 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
721 }
722
723 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
724 if (SrcTy != S1) // Invalid
725 return false;
726
727 MachineInstr *ExtI =
728 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
729 .addImm(0) // src0_modifiers
730 .addImm(0) // src0
731 .addImm(0) // src1_modifiers
732 .addImm(Signed ? -1 : 1) // src1
733 .addUse(SrcReg);
734 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
735 }
736
737 if (I.getOpcode() == AMDGPU::G_ANYEXT)
738 return selectCOPY(I);
739
740 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
741 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000742
743 // Try to use an and with a mask if it will save code size.
744 unsigned Mask;
745 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
746 MachineInstr *ExtI =
747 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
748 .addImm(Mask)
749 .addReg(SrcReg);
750 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
751 }
752
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000753 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
754 MachineInstr *ExtI =
755 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
756 .addReg(SrcReg)
757 .addImm(0) // Offset
758 .addImm(SrcSize); // Width
759 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
760 }
761
762 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
763 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
764 return false;
765
766 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
767 const unsigned SextOpc = SrcSize == 8 ?
768 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
769 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
770 .addReg(SrcReg);
771 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
772 }
773
774 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
775 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
776
777 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
778 if (DstSize > 32 && SrcSize <= 32) {
779 // We need a 64-bit register source, but the high bits don't matter.
780 unsigned ExtReg
781 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
782 unsigned UndefReg
783 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
784 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
785 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
786 .addReg(SrcReg)
787 .addImm(AMDGPU::sub0)
788 .addReg(UndefReg)
789 .addImm(AMDGPU::sub1);
790
791 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
792 .addReg(ExtReg)
793 .addImm(SrcSize << 16);
794
795 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
796 }
797
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000798 unsigned Mask;
799 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
800 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
801 .addReg(SrcReg)
802 .addImm(Mask);
803 } else {
804 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
805 .addReg(SrcReg)
806 .addImm(SrcSize << 16);
807 }
808
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000809 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
810 }
811
812 return false;
813}
814
Tom Stellardca166212017-01-30 21:56:46 +0000815bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
816 MachineBasicBlock *BB = I.getParent();
817 MachineFunction *MF = BB->getParent();
818 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000819 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000820
Tom Stellarde182b282018-05-15 17:57:09 +0000821 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
822 if (ImmOp.isFPImm()) {
823 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
824 ImmOp.ChangeToImmediate(Imm.getZExtValue());
825 } else if (ImmOp.isCImm()) {
826 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
827 }
828
829 unsigned DstReg = I.getOperand(0).getReg();
830 unsigned Size;
831 bool IsSgpr;
832 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
833 if (RB) {
834 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
835 Size = MRI.getType(DstReg).getSizeInBits();
836 } else {
837 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
838 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000839 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000840 }
841
842 if (Size != 32 && Size != 64)
843 return false;
844
845 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000846 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000847 I.setDesc(TII.get(Opcode));
848 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000849 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
850 }
851
Tom Stellardca166212017-01-30 21:56:46 +0000852 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000853 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
854 &AMDGPU::VGPR_32RegClass;
855 unsigned LoReg = MRI.createVirtualRegister(RC);
856 unsigned HiReg = MRI.createVirtualRegister(RC);
857 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000858
Tom Stellarde182b282018-05-15 17:57:09 +0000859 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000860 .addImm(Imm.trunc(32).getZExtValue());
861
Tom Stellarde182b282018-05-15 17:57:09 +0000862 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000863 .addImm(Imm.ashr(32).getZExtValue());
864
Tom Stellarde182b282018-05-15 17:57:09 +0000865 const MachineInstr *RS =
866 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
867 .addReg(LoReg)
868 .addImm(AMDGPU::sub0)
869 .addReg(HiReg)
870 .addImm(AMDGPU::sub1);
871
Tom Stellardca166212017-01-30 21:56:46 +0000872 // We can't call constrainSelectedInstRegOperands here, because it doesn't
873 // work for target independent opcodes
874 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000875 const TargetRegisterClass *DstRC =
876 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
877 if (!DstRC)
878 return true;
879 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000880}
881
882static bool isConstant(const MachineInstr &MI) {
883 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
884}
885
886void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
887 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
888
889 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
890
891 assert(PtrMI);
892
893 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
894 return;
895
896 GEPInfo GEPInfo(*PtrMI);
897
898 for (unsigned i = 1, e = 3; i < e; ++i) {
899 const MachineOperand &GEPOp = PtrMI->getOperand(i);
900 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
901 assert(OpDef);
902 if (isConstant(*OpDef)) {
903 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
904 // are lacking other optimizations.
905 assert(GEPInfo.Imm == 0);
906 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
907 continue;
908 }
909 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
910 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
911 GEPInfo.SgprParts.push_back(GEPOp.getReg());
912 else
913 GEPInfo.VgprParts.push_back(GEPOp.getReg());
914 }
915
916 AddrInfo.push_back(GEPInfo);
917 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
918}
919
Tom Stellard79b5c382019-02-20 21:02:37 +0000920bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000921 if (!MI.hasOneMemOperand())
922 return false;
923
924 const MachineMemOperand *MMO = *MI.memoperands_begin();
925 const Value *Ptr = MMO->getValue();
926
927 // UndefValue means this is a load of a kernel input. These are uniform.
928 // Sometimes LDS instructions have constant pointers.
929 // If Ptr is null, then that means this mem operand contains a
930 // PseudoSourceValue like GOT.
931 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
932 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
933 return true;
934
Matt Arsenault923712b2018-02-09 16:57:57 +0000935 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
936 return true;
937
Tom Stellardca166212017-01-30 21:56:46 +0000938 const Instruction *I = dyn_cast<Instruction>(Ptr);
939 return I && I->getMetadata("amdgpu.uniform");
940}
941
Tom Stellardca166212017-01-30 21:56:46 +0000942bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
943 for (const GEPInfo &GEPInfo : AddrInfo) {
944 if (!GEPInfo.VgprParts.empty())
945 return true;
946 }
947 return false;
948}
949
Tom Stellardca166212017-01-30 21:56:46 +0000950bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
951 MachineBasicBlock *BB = I.getParent();
952 MachineFunction *MF = BB->getParent();
953 MachineRegisterInfo &MRI = MF->getRegInfo();
954 DebugLoc DL = I.getDebugLoc();
955 unsigned DstReg = I.getOperand(0).getReg();
956 unsigned PtrReg = I.getOperand(1).getReg();
957 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
958 unsigned Opcode;
959
960 SmallVector<GEPInfo, 4> AddrInfo;
961
962 getAddrModeInfo(I, MRI, AddrInfo);
963
Tom Stellardca166212017-01-30 21:56:46 +0000964 switch (LoadSize) {
965 default:
966 llvm_unreachable("Load size not supported\n");
967 case 32:
968 Opcode = AMDGPU::FLAT_LOAD_DWORD;
969 break;
970 case 64:
971 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
972 break;
973 }
974
975 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
976 .add(I.getOperand(0))
977 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000978 .addImm(0) // offset
979 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000980 .addImm(0) // slc
981 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000982
983 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
984 I.eraseFromParent();
985 return Ret;
986}
987
Matt Arsenault64642802019-07-01 15:39:27 +0000988bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
989 MachineBasicBlock *BB = I.getParent();
990 MachineFunction *MF = BB->getParent();
991 MachineRegisterInfo &MRI = MF->getRegInfo();
992 MachineOperand &CondOp = I.getOperand(0);
993 Register CondReg = CondOp.getReg();
994 const DebugLoc &DL = I.getDebugLoc();
995
Matt Arsenault2ab25f92019-07-01 16:06:02 +0000996 unsigned BrOpcode;
997 Register CondPhysReg;
998 const TargetRegisterClass *ConstrainRC;
999
1000 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1001 // whether the branch is uniform when selecting the instruction. In
1002 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1003 // RegBankSelect knows what it's doing if the branch condition is scc, even
1004 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001005 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001006 CondPhysReg = AMDGPU::SCC;
1007 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1008 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1009 } else if (isVCC(CondReg, MRI)) {
1010 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1011 // We sort of know that a VCC producer based on the register bank, that ands
1012 // inactive lanes with 0. What if there was a logical operation with vcc
1013 // producers in different blocks/with different exec masks?
1014 // FIXME: Should scc->vcc copies and with exec?
1015 CondPhysReg = TRI.getVCC();
1016 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1017 ConstrainRC = TRI.getBoolRC();
1018 } else
1019 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001020
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001021 if (!MRI.getRegClassOrNull(CondReg))
1022 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001023
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001024 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1025 .addReg(CondReg);
1026 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1027 .addMBB(I.getOperand(1).getMBB());
1028
1029 I.eraseFromParent();
1030 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001031}
1032
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001033bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1034 MachineBasicBlock *BB = I.getParent();
1035 MachineFunction *MF = BB->getParent();
1036 MachineRegisterInfo &MRI = MF->getRegInfo();
1037
1038 Register DstReg = I.getOperand(0).getReg();
1039 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1040 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1041 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1042 if (IsVGPR)
1043 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1044
1045 return RBI.constrainGenericRegister(
1046 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1047}
1048
Daniel Sandersf76f3152017-11-16 00:46:35 +00001049bool AMDGPUInstructionSelector::select(MachineInstr &I,
1050 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +00001051
Tom Stellard7712ee82018-06-22 00:44:29 +00001052 if (!isPreISelGenericOpcode(I.getOpcode())) {
1053 if (I.isCopy())
1054 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001055 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001056 }
Tom Stellardca166212017-01-30 21:56:46 +00001057
1058 switch (I.getOpcode()) {
1059 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001060 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001061 case TargetOpcode::G_ADD:
1062 return selectG_ADD(I);
Tom Stellard7c650782018-10-05 04:34:09 +00001063 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001064 case TargetOpcode::G_BITCAST:
1065 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001066 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001067 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001068 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001069 case TargetOpcode::G_EXTRACT:
1070 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001071 case TargetOpcode::G_GEP:
1072 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001073 case TargetOpcode::G_IMPLICIT_DEF:
1074 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001075 case TargetOpcode::G_INSERT:
1076 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001077 case TargetOpcode::G_INTRINSIC:
1078 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001079 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1080 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001081 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001082 if (selectG_ICMP(I))
1083 return true;
1084 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001085 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001086 if (selectImpl(I, CoverageInfo))
1087 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001088 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001089 case TargetOpcode::G_SELECT:
1090 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001091 case TargetOpcode::G_STORE:
1092 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001093 case TargetOpcode::G_TRUNC:
1094 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001095 case TargetOpcode::G_SEXT:
1096 case TargetOpcode::G_ZEXT:
1097 case TargetOpcode::G_ANYEXT:
1098 if (selectG_SZA_EXT(I)) {
1099 I.eraseFromParent();
1100 return true;
1101 }
1102
1103 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001104 case TargetOpcode::G_BRCOND:
1105 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001106 case TargetOpcode::G_FRAME_INDEX:
1107 return selectG_FRAME_INDEX(I);
Tom Stellardca166212017-01-30 21:56:46 +00001108 }
1109 return false;
1110}
Tom Stellard1dc90202018-05-10 20:53:06 +00001111
Tom Stellard26fac0f2018-06-22 02:54:57 +00001112InstructionSelector::ComplexRendererFns
1113AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1114 return {{
1115 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1116 }};
1117
1118}
1119
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001120std::pair<Register, unsigned>
1121AMDGPUInstructionSelector::selectVOP3ModsImpl(
1122 Register Src, const MachineRegisterInfo &MRI) const {
1123 unsigned Mods = 0;
1124 MachineInstr *MI = MRI.getVRegDef(Src);
1125
1126 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1127 Src = MI->getOperand(1).getReg();
1128 Mods |= SISrcMods::NEG;
1129 MI = MRI.getVRegDef(Src);
1130 }
1131
1132 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1133 Src = MI->getOperand(1).getReg();
1134 Mods |= SISrcMods::ABS;
1135 }
1136
1137 return std::make_pair(Src, Mods);
1138}
1139
Tom Stellard1dc90202018-05-10 20:53:06 +00001140///
1141/// This will select either an SGPR or VGPR operand and will save us from
1142/// having to write an extra tablegen pattern.
1143InstructionSelector::ComplexRendererFns
1144AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1145 return {{
1146 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1147 }};
1148}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001149
1150InstructionSelector::ComplexRendererFns
1151AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001152 MachineRegisterInfo &MRI
1153 = Root.getParent()->getParent()->getParent()->getRegInfo();
1154
1155 Register Src;
1156 unsigned Mods;
1157 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1158
Tom Stellarddcc95e92018-05-11 05:44:16 +00001159 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001160 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1161 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1162 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1163 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001164 }};
1165}
Tom Stellard9a653572018-06-22 02:34:29 +00001166InstructionSelector::ComplexRendererFns
1167AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1168 return {{
1169 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1170 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1171 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1172 }};
1173}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001174
1175InstructionSelector::ComplexRendererFns
1176AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001177 MachineRegisterInfo &MRI
1178 = Root.getParent()->getParent()->getParent()->getRegInfo();
1179
1180 Register Src;
1181 unsigned Mods;
1182 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1183
Tom Stellard46bbbc32018-06-13 22:30:47 +00001184 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001185 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1186 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001187 }};
1188}
Tom Stellard79b5c382019-02-20 21:02:37 +00001189
1190InstructionSelector::ComplexRendererFns
1191AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1192 MachineRegisterInfo &MRI =
1193 Root.getParent()->getParent()->getParent()->getRegInfo();
1194
1195 SmallVector<GEPInfo, 4> AddrInfo;
1196 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1197
1198 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1199 return None;
1200
1201 const GEPInfo &GEPInfo = AddrInfo[0];
1202
1203 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1204 return None;
1205
1206 unsigned PtrReg = GEPInfo.SgprParts[0];
1207 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1208 return {{
1209 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1210 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1211 }};
1212}
1213
1214InstructionSelector::ComplexRendererFns
1215AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1216 MachineRegisterInfo &MRI =
1217 Root.getParent()->getParent()->getParent()->getRegInfo();
1218
1219 SmallVector<GEPInfo, 4> AddrInfo;
1220 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1221
1222 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1223 return None;
1224
1225 const GEPInfo &GEPInfo = AddrInfo[0];
1226 unsigned PtrReg = GEPInfo.SgprParts[0];
1227 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1228 if (!isUInt<32>(EncodedImm))
1229 return None;
1230
1231 return {{
1232 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1233 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1234 }};
1235}
1236
1237InstructionSelector::ComplexRendererFns
1238AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1239 MachineInstr *MI = Root.getParent();
1240 MachineBasicBlock *MBB = MI->getParent();
1241 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1242
1243 SmallVector<GEPInfo, 4> AddrInfo;
1244 getAddrModeInfo(*MI, MRI, AddrInfo);
1245
1246 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1247 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1248 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1249 return None;
1250
1251 const GEPInfo &GEPInfo = AddrInfo[0];
1252 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1253 return None;
1254
1255 // If we make it this far we have a load with an 32-bit immediate offset.
1256 // It is OK to select this using a sgpr offset, because we have already
1257 // failed trying to select this load into one of the _IMM variants since
1258 // the _IMM Patterns are considered before the _SGPR patterns.
1259 unsigned PtrReg = GEPInfo.SgprParts[0];
1260 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1261 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1262 .addImm(GEPInfo.Imm);
1263 return {{
1264 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1265 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1266 }};
1267}