blob: c7237e425713cb4185ae35f69f2f45c9ed5a4aaf [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Tom Stellard8b1c53b2019-06-17 16:27:43 +000062static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63 if (Reg == AMDGPU::SCC)
64 return true;
65
66 if (TargetRegisterInfo::isPhysicalRegister(Reg))
67 return false;
68
69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70 const TargetRegisterClass *RC =
71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
72 if (RC)
73 return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
74 MRI.getType(Reg).getSizeInBits() == 1;
75
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Tom Stellard1e0edad2018-05-10 21:20:10 +000080bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
81 MachineBasicBlock *BB = I.getParent();
82 MachineFunction *MF = BB->getParent();
83 MachineRegisterInfo &MRI = MF->getRegInfo();
84 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +000085
86 // Special case for COPY from the scc register bank. The scc register bank
87 // is modeled using 32-bit sgprs.
88 const MachineOperand &Src = I.getOperand(1);
89 unsigned SrcReg = Src.getReg();
90 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
91 unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
92 unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
93
94 // We have a copy from a 32-bit to 64-bit register. This happens
95 // when we are selecting scc->vcc copies.
96 if (DstSize == 64) {
97 const DebugLoc &DL = I.getDebugLoc();
98 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
99 .addImm(0)
100 .addReg(SrcReg);
101 if (!MRI.getRegClassOrNull(SrcReg))
102 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
103 I.eraseFromParent();
104 return true;
105 }
106 }
107
Tom Stellard1e0edad2018-05-10 21:20:10 +0000108 for (const MachineOperand &MO : I.operands()) {
109 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
110 continue;
111
112 const TargetRegisterClass *RC =
113 TRI.getConstrainedRegClassForOperand(MO, MRI);
114 if (!RC)
115 continue;
116 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
117 }
118 return true;
119}
120
Tom Stellardca166212017-01-30 21:56:46 +0000121MachineOperand
122AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
123 unsigned SubIdx) const {
124
125 MachineInstr *MI = MO.getParent();
126 MachineBasicBlock *BB = MO.getParent()->getParent();
127 MachineFunction *MF = BB->getParent();
128 MachineRegisterInfo &MRI = MF->getRegInfo();
129 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
130
131 if (MO.isReg()) {
132 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
133 unsigned Reg = MO.getReg();
134 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
135 .addReg(Reg, 0, ComposedSubIdx);
136
137 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
138 MO.isKill(), MO.isDead(), MO.isUndef(),
139 MO.isEarlyClobber(), 0, MO.isDebug(),
140 MO.isInternalRead());
141 }
142
143 assert(MO.isImm());
144
145 APInt Imm(64, MO.getImm());
146
147 switch (SubIdx) {
148 default:
149 llvm_unreachable("do not know to split immediate with this sub index.");
150 case AMDGPU::sub0:
151 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
152 case AMDGPU::sub1:
153 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
154 }
155}
156
Tom Stellard390a5f42018-07-13 21:05:14 +0000157static int64_t getConstant(const MachineInstr *MI) {
158 return MI->getOperand(1).getCImm()->getSExtValue();
159}
160
Tom Stellardca166212017-01-30 21:56:46 +0000161bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
162 MachineBasicBlock *BB = I.getParent();
163 MachineFunction *MF = BB->getParent();
164 MachineRegisterInfo &MRI = MF->getRegInfo();
165 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
166 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
167 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
168
169 if (Size != 64)
170 return false;
171
172 DebugLoc DL = I.getDebugLoc();
173
Tom Stellard124f5cc2017-01-31 15:24:11 +0000174 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
175 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
176
Tom Stellardca166212017-01-30 21:56:46 +0000177 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000178 .add(Lo1)
179 .add(Lo2);
180
181 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
182 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000183
184 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000185 .add(Hi1)
186 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000187
188 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
189 .addReg(DstLo)
190 .addImm(AMDGPU::sub0)
191 .addReg(DstHi)
192 .addImm(AMDGPU::sub1);
193
194 for (MachineOperand &MO : I.explicit_operands()) {
195 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
196 continue;
197 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
198 }
199
200 I.eraseFromParent();
201 return true;
202}
203
Tom Stellard41f32192019-02-28 23:37:48 +0000204bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
205 MachineBasicBlock *BB = I.getParent();
206 MachineFunction *MF = BB->getParent();
207 MachineRegisterInfo &MRI = MF->getRegInfo();
208 assert(I.getOperand(2).getImm() % 32 == 0);
209 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
210 const DebugLoc &DL = I.getDebugLoc();
211 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
212 I.getOperand(0).getReg())
213 .addReg(I.getOperand(1).getReg(), 0, SubReg);
214
215 for (const MachineOperand &MO : Copy->operands()) {
216 const TargetRegisterClass *RC =
217 TRI.getConstrainedRegClassForOperand(MO, MRI);
218 if (!RC)
219 continue;
220 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
221 }
222 I.eraseFromParent();
223 return true;
224}
225
Tom Stellardca166212017-01-30 21:56:46 +0000226bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
227 return selectG_ADD(I);
228}
229
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000230bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
231 MachineBasicBlock *BB = I.getParent();
232 MachineFunction *MF = BB->getParent();
233 MachineRegisterInfo &MRI = MF->getRegInfo();
234 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000235
236 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
237 // regbank check here is to know why getConstrainedRegClassForOperand failed.
238 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
239 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
240 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
241 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
242 return true;
243 }
244
245 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000246}
247
Tom Stellard33634d1b2019-03-01 00:50:26 +0000248bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
249 MachineBasicBlock *BB = I.getParent();
250 MachineFunction *MF = BB->getParent();
251 MachineRegisterInfo &MRI = MF->getRegInfo();
252 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
253 DebugLoc DL = I.getDebugLoc();
254 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
255 .addDef(I.getOperand(0).getReg())
256 .addReg(I.getOperand(1).getReg())
257 .addReg(I.getOperand(2).getReg())
258 .addImm(SubReg);
259
260 for (const MachineOperand &MO : Ins->operands()) {
261 if (!MO.isReg())
262 continue;
263 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
264 continue;
265
266 const TargetRegisterClass *RC =
267 TRI.getConstrainedRegClassForOperand(MO, MRI);
268 if (!RC)
269 continue;
270 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
271 }
272 I.eraseFromParent();
273 return true;
274}
275
Tom Stellarda9284732018-06-14 19:26:37 +0000276bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
277 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000278 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000279 switch (IntrinsicID) {
280 default:
281 break;
Tom Stellardac684712018-07-13 22:16:03 +0000282 case Intrinsic::maxnum:
283 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000284 case Intrinsic::amdgcn_cvt_pkrtz:
285 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000286
287 case Intrinsic::amdgcn_kernarg_segment_ptr: {
288 MachineFunction *MF = I.getParent()->getParent();
289 MachineRegisterInfo &MRI = MF->getRegInfo();
290 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
291 const ArgDescriptor *InputPtrReg;
292 const TargetRegisterClass *RC;
293 const DebugLoc &DL = I.getDebugLoc();
294
295 std::tie(InputPtrReg, RC)
296 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
297 if (!InputPtrReg)
298 report_fatal_error("missing kernarg segment ptr");
299
300 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
301 .add(I.getOperand(0))
302 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
303 I.eraseFromParent();
304 return true;
305 }
Tom Stellarda9284732018-06-14 19:26:37 +0000306 }
307 return false;
308}
309
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000310static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
311 assert(Size == 32 || Size == 64);
312 switch (P) {
313 default:
314 llvm_unreachable("Unknown condition code!");
315 case CmpInst::ICMP_NE:
316 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
317 case CmpInst::ICMP_EQ:
318 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
319 case CmpInst::ICMP_SGT:
320 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
321 case CmpInst::ICMP_SGE:
322 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
323 case CmpInst::ICMP_SLT:
324 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
325 case CmpInst::ICMP_SLE:
326 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
327 case CmpInst::ICMP_UGT:
328 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
329 case CmpInst::ICMP_UGE:
330 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
331 case CmpInst::ICMP_ULT:
332 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
333 case CmpInst::ICMP_ULE:
334 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
335 }
336}
337
338static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
339 // FIXME: VI supports 64-bit comparse.
340 assert(Size == 32);
341 switch (P) {
342 default:
343 llvm_unreachable("Unknown condition code!");
344 case CmpInst::ICMP_NE:
345 return AMDGPU::S_CMP_LG_U32;
346 case CmpInst::ICMP_EQ:
347 return AMDGPU::S_CMP_EQ_U32;
348 case CmpInst::ICMP_SGT:
349 return AMDGPU::S_CMP_GT_I32;
350 case CmpInst::ICMP_SGE:
351 return AMDGPU::S_CMP_GE_I32;
352 case CmpInst::ICMP_SLT:
353 return AMDGPU::S_CMP_LT_I32;
354 case CmpInst::ICMP_SLE:
355 return AMDGPU::S_CMP_LE_I32;
356 case CmpInst::ICMP_UGT:
357 return AMDGPU::S_CMP_GT_U32;
358 case CmpInst::ICMP_UGE:
359 return AMDGPU::S_CMP_GE_U32;
360 case CmpInst::ICMP_ULT:
361 return AMDGPU::S_CMP_LT_U32;
362 case CmpInst::ICMP_ULE:
363 return AMDGPU::S_CMP_LE_U32;
364 }
365}
366
367bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
368 MachineBasicBlock *BB = I.getParent();
369 MachineFunction *MF = BB->getParent();
370 MachineRegisterInfo &MRI = MF->getRegInfo();
371 DebugLoc DL = I.getDebugLoc();
372
373 unsigned SrcReg = I.getOperand(2).getReg();
374 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
375 // FIXME: VI supports 64-bit compares.
376 assert(Size == 32);
377
378 unsigned CCReg = I.getOperand(0).getReg();
379 if (isSCC(CCReg, MRI)) {
380 unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
381 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
382 .add(I.getOperand(2))
383 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000384 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
385 .addReg(AMDGPU::SCC);
386 bool Ret =
387 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
388 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000389 I.eraseFromParent();
390 return Ret;
391 }
392
393 assert(Size == 32 || Size == 64);
394 unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
395 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
396 I.getOperand(0).getReg())
397 .add(I.getOperand(2))
398 .add(I.getOperand(3));
399 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
400 AMDGPU::SReg_64RegClass, MRI);
401 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
402 I.eraseFromParent();
403 return Ret;
404}
405
Tom Stellard390a5f42018-07-13 21:05:14 +0000406static MachineInstr *
407buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
408 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
409 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
410 const DebugLoc &DL = Insert->getDebugLoc();
411 MachineBasicBlock &BB = *Insert->getParent();
412 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
413 return BuildMI(BB, Insert, DL, TII.get(Opcode))
414 .addImm(Tgt)
415 .addReg(Reg0)
416 .addReg(Reg1)
417 .addReg(Reg2)
418 .addReg(Reg3)
419 .addImm(VM)
420 .addImm(Compr)
421 .addImm(Enabled);
422}
423
424bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
425 MachineInstr &I,
426 CodeGenCoverage &CoverageInfo) const {
427 MachineBasicBlock *BB = I.getParent();
428 MachineFunction *MF = BB->getParent();
429 MachineRegisterInfo &MRI = MF->getRegInfo();
430
431 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
432 switch (IntrinsicID) {
433 case Intrinsic::amdgcn_exp: {
434 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
435 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
436 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
437 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
438
439 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
440 I.getOperand(4).getReg(),
441 I.getOperand(5).getReg(),
442 I.getOperand(6).getReg(),
443 VM, false, Enabled, Done);
444
445 I.eraseFromParent();
446 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
447 }
448 case Intrinsic::amdgcn_exp_compr: {
449 const DebugLoc &DL = I.getDebugLoc();
450 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
451 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
452 unsigned Reg0 = I.getOperand(3).getReg();
453 unsigned Reg1 = I.getOperand(4).getReg();
454 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
455 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
456 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
457
458 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
459 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
460 true, Enabled, Done);
461
462 I.eraseFromParent();
463 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
464 }
465 }
466 return false;
467}
468
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000469bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
470 MachineBasicBlock *BB = I.getParent();
471 MachineFunction *MF = BB->getParent();
472 MachineRegisterInfo &MRI = MF->getRegInfo();
473 const DebugLoc &DL = I.getDebugLoc();
474
475 unsigned DstReg = I.getOperand(0).getReg();
476 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
477 assert(Size == 32 || Size == 64);
478 const MachineOperand &CCOp = I.getOperand(1);
479 unsigned CCReg = CCOp.getReg();
480 if (isSCC(CCReg, MRI)) {
481 unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
482 AMDGPU::S_CSELECT_B64;
483 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
484 .addReg(CCReg);
485
486 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
487 // bank, because it does not cover the register class that we used to represent
488 // for it. So we need to manually set the register class here.
489 if (!MRI.getRegClassOrNull(CCReg))
490 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
491 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
492 .add(I.getOperand(2))
493 .add(I.getOperand(3));
494
495 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
496 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
497 I.eraseFromParent();
498 return Ret;
499 }
500
501 assert(Size == 32);
502 // FIXME: Support 64-bit select
503 MachineInstr *Select =
504 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
505 .addImm(0)
506 .add(I.getOperand(3))
507 .addImm(0)
508 .add(I.getOperand(2))
509 .add(I.getOperand(1));
510
511 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
512 I.eraseFromParent();
513 return Ret;
514}
515
Tom Stellardca166212017-01-30 21:56:46 +0000516bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
517 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000518 MachineFunction *MF = BB->getParent();
519 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000520 DebugLoc DL = I.getDebugLoc();
Tom Stellard655fdd32018-05-11 23:12:49 +0000521 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
522 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000523
524 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000525 switch (StoreSize) {
526 default:
527 return false;
528 case 32:
529 Opcode = AMDGPU::FLAT_STORE_DWORD;
530 break;
531 case 64:
532 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
533 break;
534 case 96:
535 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
536 break;
537 case 128:
538 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
539 break;
540 }
541
542 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000543 .add(I.getOperand(1))
544 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000545 .addImm(0) // offset
546 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000547 .addImm(0) // slc
548 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000549
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000550
Tom Stellardca166212017-01-30 21:56:46 +0000551 // Now that we selected an opcode, we need to constrain the register
552 // operands to use appropriate classes.
553 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
554
555 I.eraseFromParent();
556 return Ret;
557}
558
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000559static int sizeToSubRegIndex(unsigned Size) {
560 switch (Size) {
561 case 32:
562 return AMDGPU::sub0;
563 case 64:
564 return AMDGPU::sub0_sub1;
565 case 96:
566 return AMDGPU::sub0_sub1_sub2;
567 case 128:
568 return AMDGPU::sub0_sub1_sub2_sub3;
569 case 256:
570 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
571 default:
572 if (Size < 32)
573 return AMDGPU::sub0;
574 if (Size > 256)
575 return -1;
576 return sizeToSubRegIndex(PowerOf2Ceil(Size));
577 }
578}
579
580bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
581 MachineBasicBlock *BB = I.getParent();
582 MachineFunction *MF = BB->getParent();
583 MachineRegisterInfo &MRI = MF->getRegInfo();
584
585 unsigned DstReg = I.getOperand(0).getReg();
586 unsigned SrcReg = I.getOperand(1).getReg();
587 const LLT DstTy = MRI.getType(DstReg);
588 const LLT SrcTy = MRI.getType(SrcReg);
589 if (!DstTy.isScalar())
590 return false;
591
592 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
593 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
594 if (SrcRB != DstRB)
595 return false;
596
597 unsigned DstSize = DstTy.getSizeInBits();
598 unsigned SrcSize = SrcTy.getSizeInBits();
599
600 const TargetRegisterClass *SrcRC
601 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
602 const TargetRegisterClass *DstRC
603 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
604
605 if (SrcSize > 32) {
606 int SubRegIdx = sizeToSubRegIndex(DstSize);
607 if (SubRegIdx == -1)
608 return false;
609
610 // Deal with weird cases where the class only partially supports the subreg
611 // index.
612 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
613 if (!SrcRC)
614 return false;
615
616 I.getOperand(1).setSubReg(SubRegIdx);
617 }
618
619 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
620 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
621 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
622 return false;
623 }
624
625 I.setDesc(TII.get(TargetOpcode::COPY));
626 return true;
627}
628
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000629/// \returns true if a bitmask for \p Size bits will be an inline immediate.
630static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
631 Mask = maskTrailingOnes<unsigned>(Size);
632 int SignedMask = static_cast<int>(Mask);
633 return SignedMask >= -16 && SignedMask <= 64;
634}
635
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000636bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
637 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
638 const DebugLoc &DL = I.getDebugLoc();
639 MachineBasicBlock &MBB = *I.getParent();
640 MachineFunction &MF = *MBB.getParent();
641 MachineRegisterInfo &MRI = MF.getRegInfo();
642 const unsigned DstReg = I.getOperand(0).getReg();
643 const unsigned SrcReg = I.getOperand(1).getReg();
644
645 const LLT DstTy = MRI.getType(DstReg);
646 const LLT SrcTy = MRI.getType(SrcReg);
647 const LLT S1 = LLT::scalar(1);
648 const unsigned SrcSize = SrcTy.getSizeInBits();
649 const unsigned DstSize = DstTy.getSizeInBits();
650 if (!DstTy.isScalar())
651 return false;
652
653 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
654
655 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
656 if (SrcTy != S1 || DstSize > 64) // Invalid
657 return false;
658
659 unsigned Opcode =
660 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
661 const TargetRegisterClass *DstRC =
662 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
663
664 // FIXME: Create an extra copy to avoid incorrectly constraining the result
665 // of the scc producer.
666 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
667 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
668 .addReg(SrcReg);
669 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
670 .addReg(TmpReg);
671
672 // The instruction operands are backwards from what you would expect.
673 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
674 .addImm(0)
675 .addImm(Signed ? -1 : 1);
676 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
677 }
678
679 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
680 if (SrcTy != S1) // Invalid
681 return false;
682
683 MachineInstr *ExtI =
684 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
685 .addImm(0) // src0_modifiers
686 .addImm(0) // src0
687 .addImm(0) // src1_modifiers
688 .addImm(Signed ? -1 : 1) // src1
689 .addUse(SrcReg);
690 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
691 }
692
693 if (I.getOpcode() == AMDGPU::G_ANYEXT)
694 return selectCOPY(I);
695
696 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
697 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000698
699 // Try to use an and with a mask if it will save code size.
700 unsigned Mask;
701 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
702 MachineInstr *ExtI =
703 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
704 .addImm(Mask)
705 .addReg(SrcReg);
706 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
707 }
708
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000709 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
710 MachineInstr *ExtI =
711 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
712 .addReg(SrcReg)
713 .addImm(0) // Offset
714 .addImm(SrcSize); // Width
715 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
716 }
717
718 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
719 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
720 return false;
721
722 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
723 const unsigned SextOpc = SrcSize == 8 ?
724 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
725 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
726 .addReg(SrcReg);
727 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
728 }
729
730 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
731 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
732
733 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
734 if (DstSize > 32 && SrcSize <= 32) {
735 // We need a 64-bit register source, but the high bits don't matter.
736 unsigned ExtReg
737 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
738 unsigned UndefReg
739 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
740 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
741 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
742 .addReg(SrcReg)
743 .addImm(AMDGPU::sub0)
744 .addReg(UndefReg)
745 .addImm(AMDGPU::sub1);
746
747 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
748 .addReg(ExtReg)
749 .addImm(SrcSize << 16);
750
751 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
752 }
753
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000754 unsigned Mask;
755 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
756 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
757 .addReg(SrcReg)
758 .addImm(Mask);
759 } else {
760 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
761 .addReg(SrcReg)
762 .addImm(SrcSize << 16);
763 }
764
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000765 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
766 }
767
768 return false;
769}
770
Tom Stellardca166212017-01-30 21:56:46 +0000771bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
772 MachineBasicBlock *BB = I.getParent();
773 MachineFunction *MF = BB->getParent();
774 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000775 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000776
Tom Stellarde182b282018-05-15 17:57:09 +0000777 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
778 if (ImmOp.isFPImm()) {
779 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
780 ImmOp.ChangeToImmediate(Imm.getZExtValue());
781 } else if (ImmOp.isCImm()) {
782 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
783 }
784
785 unsigned DstReg = I.getOperand(0).getReg();
786 unsigned Size;
787 bool IsSgpr;
788 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
789 if (RB) {
790 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
791 Size = MRI.getType(DstReg).getSizeInBits();
792 } else {
793 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
794 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000795 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000796 }
797
798 if (Size != 32 && Size != 64)
799 return false;
800
801 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000802 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000803 I.setDesc(TII.get(Opcode));
804 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000805 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
806 }
807
Tom Stellardca166212017-01-30 21:56:46 +0000808 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000809 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
810 &AMDGPU::VGPR_32RegClass;
811 unsigned LoReg = MRI.createVirtualRegister(RC);
812 unsigned HiReg = MRI.createVirtualRegister(RC);
813 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000814
Tom Stellarde182b282018-05-15 17:57:09 +0000815 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000816 .addImm(Imm.trunc(32).getZExtValue());
817
Tom Stellarde182b282018-05-15 17:57:09 +0000818 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000819 .addImm(Imm.ashr(32).getZExtValue());
820
Tom Stellarde182b282018-05-15 17:57:09 +0000821 const MachineInstr *RS =
822 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
823 .addReg(LoReg)
824 .addImm(AMDGPU::sub0)
825 .addReg(HiReg)
826 .addImm(AMDGPU::sub1);
827
Tom Stellardca166212017-01-30 21:56:46 +0000828 // We can't call constrainSelectedInstRegOperands here, because it doesn't
829 // work for target independent opcodes
830 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000831 const TargetRegisterClass *DstRC =
832 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
833 if (!DstRC)
834 return true;
835 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000836}
837
838static bool isConstant(const MachineInstr &MI) {
839 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
840}
841
842void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
843 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
844
845 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
846
847 assert(PtrMI);
848
849 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
850 return;
851
852 GEPInfo GEPInfo(*PtrMI);
853
854 for (unsigned i = 1, e = 3; i < e; ++i) {
855 const MachineOperand &GEPOp = PtrMI->getOperand(i);
856 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
857 assert(OpDef);
858 if (isConstant(*OpDef)) {
859 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
860 // are lacking other optimizations.
861 assert(GEPInfo.Imm == 0);
862 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
863 continue;
864 }
865 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
866 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
867 GEPInfo.SgprParts.push_back(GEPOp.getReg());
868 else
869 GEPInfo.VgprParts.push_back(GEPOp.getReg());
870 }
871
872 AddrInfo.push_back(GEPInfo);
873 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
874}
875
Tom Stellard79b5c382019-02-20 21:02:37 +0000876bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000877 if (!MI.hasOneMemOperand())
878 return false;
879
880 const MachineMemOperand *MMO = *MI.memoperands_begin();
881 const Value *Ptr = MMO->getValue();
882
883 // UndefValue means this is a load of a kernel input. These are uniform.
884 // Sometimes LDS instructions have constant pointers.
885 // If Ptr is null, then that means this mem operand contains a
886 // PseudoSourceValue like GOT.
887 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
888 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
889 return true;
890
Matt Arsenault923712b2018-02-09 16:57:57 +0000891 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
892 return true;
893
Tom Stellardca166212017-01-30 21:56:46 +0000894 const Instruction *I = dyn_cast<Instruction>(Ptr);
895 return I && I->getMetadata("amdgpu.uniform");
896}
897
Tom Stellardca166212017-01-30 21:56:46 +0000898bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
899 for (const GEPInfo &GEPInfo : AddrInfo) {
900 if (!GEPInfo.VgprParts.empty())
901 return true;
902 }
903 return false;
904}
905
Tom Stellardca166212017-01-30 21:56:46 +0000906bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
907 MachineBasicBlock *BB = I.getParent();
908 MachineFunction *MF = BB->getParent();
909 MachineRegisterInfo &MRI = MF->getRegInfo();
910 DebugLoc DL = I.getDebugLoc();
911 unsigned DstReg = I.getOperand(0).getReg();
912 unsigned PtrReg = I.getOperand(1).getReg();
913 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
914 unsigned Opcode;
915
916 SmallVector<GEPInfo, 4> AddrInfo;
917
918 getAddrModeInfo(I, MRI, AddrInfo);
919
Tom Stellardca166212017-01-30 21:56:46 +0000920 switch (LoadSize) {
921 default:
922 llvm_unreachable("Load size not supported\n");
923 case 32:
924 Opcode = AMDGPU::FLAT_LOAD_DWORD;
925 break;
926 case 64:
927 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
928 break;
929 }
930
931 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
932 .add(I.getOperand(0))
933 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000934 .addImm(0) // offset
935 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000936 .addImm(0) // slc
937 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000938
939 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
940 I.eraseFromParent();
941 return Ret;
942}
943
Daniel Sandersf76f3152017-11-16 00:46:35 +0000944bool AMDGPUInstructionSelector::select(MachineInstr &I,
945 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +0000946
Tom Stellard7712ee82018-06-22 00:44:29 +0000947 if (!isPreISelGenericOpcode(I.getOpcode())) {
948 if (I.isCopy())
949 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000950 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +0000951 }
Tom Stellardca166212017-01-30 21:56:46 +0000952
953 switch (I.getOpcode()) {
954 default:
Tom Stellard1dc90202018-05-10 20:53:06 +0000955 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000956 case TargetOpcode::G_ADD:
957 return selectG_ADD(I);
Tom Stellard7c650782018-10-05 04:34:09 +0000958 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +0000959 case TargetOpcode::G_BITCAST:
960 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000961 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +0000962 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +0000963 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +0000964 case TargetOpcode::G_EXTRACT:
965 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +0000966 case TargetOpcode::G_GEP:
967 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000968 case TargetOpcode::G_IMPLICIT_DEF:
969 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +0000970 case TargetOpcode::G_INSERT:
971 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +0000972 case TargetOpcode::G_INTRINSIC:
973 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000974 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
975 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000976 case TargetOpcode::G_ICMP:
977 return selectG_ICMP(I);
Tom Stellardca166212017-01-30 21:56:46 +0000978 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +0000979 if (selectImpl(I, CoverageInfo))
980 return true;
Tom Stellardca166212017-01-30 21:56:46 +0000981 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000982 case TargetOpcode::G_SELECT:
983 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +0000984 case TargetOpcode::G_STORE:
985 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000986 case TargetOpcode::G_TRUNC:
987 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000988 case TargetOpcode::G_SEXT:
989 case TargetOpcode::G_ZEXT:
990 case TargetOpcode::G_ANYEXT:
991 if (selectG_SZA_EXT(I)) {
992 I.eraseFromParent();
993 return true;
994 }
995
996 return false;
Tom Stellardca166212017-01-30 21:56:46 +0000997 }
998 return false;
999}
Tom Stellard1dc90202018-05-10 20:53:06 +00001000
Tom Stellard26fac0f2018-06-22 02:54:57 +00001001InstructionSelector::ComplexRendererFns
1002AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1003 return {{
1004 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1005 }};
1006
1007}
1008
Tom Stellard1dc90202018-05-10 20:53:06 +00001009///
1010/// This will select either an SGPR or VGPR operand and will save us from
1011/// having to write an extra tablegen pattern.
1012InstructionSelector::ComplexRendererFns
1013AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1014 return {{
1015 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1016 }};
1017}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001018
1019InstructionSelector::ComplexRendererFns
1020AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
1021 return {{
1022 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1023 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
1024 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1025 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1026 }};
1027}
Tom Stellard9a653572018-06-22 02:34:29 +00001028InstructionSelector::ComplexRendererFns
1029AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1030 return {{
1031 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1032 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1033 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1034 }};
1035}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001036
1037InstructionSelector::ComplexRendererFns
1038AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
1039 return {{
1040 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1041 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
1042 }};
1043}
Tom Stellard79b5c382019-02-20 21:02:37 +00001044
1045InstructionSelector::ComplexRendererFns
1046AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1047 MachineRegisterInfo &MRI =
1048 Root.getParent()->getParent()->getParent()->getRegInfo();
1049
1050 SmallVector<GEPInfo, 4> AddrInfo;
1051 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1052
1053 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1054 return None;
1055
1056 const GEPInfo &GEPInfo = AddrInfo[0];
1057
1058 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1059 return None;
1060
1061 unsigned PtrReg = GEPInfo.SgprParts[0];
1062 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1063 return {{
1064 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1065 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1066 }};
1067}
1068
1069InstructionSelector::ComplexRendererFns
1070AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1071 MachineRegisterInfo &MRI =
1072 Root.getParent()->getParent()->getParent()->getRegInfo();
1073
1074 SmallVector<GEPInfo, 4> AddrInfo;
1075 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1076
1077 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1078 return None;
1079
1080 const GEPInfo &GEPInfo = AddrInfo[0];
1081 unsigned PtrReg = GEPInfo.SgprParts[0];
1082 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1083 if (!isUInt<32>(EncodedImm))
1084 return None;
1085
1086 return {{
1087 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1088 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1089 }};
1090}
1091
1092InstructionSelector::ComplexRendererFns
1093AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1094 MachineInstr *MI = Root.getParent();
1095 MachineBasicBlock *MBB = MI->getParent();
1096 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1097
1098 SmallVector<GEPInfo, 4> AddrInfo;
1099 getAddrModeInfo(*MI, MRI, AddrInfo);
1100
1101 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1102 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1103 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1104 return None;
1105
1106 const GEPInfo &GEPInfo = AddrInfo[0];
1107 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1108 return None;
1109
1110 // If we make it this far we have a load with an 32-bit immediate offset.
1111 // It is OK to select this using a sgpr offset, because we have already
1112 // failed trying to select this load into one of the _IMM variants since
1113 // the _IMM Patterns are considered before the _SGPR patterns.
1114 unsigned PtrReg = GEPInfo.SgprParts[0];
1115 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1116 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1117 .addImm(GEPInfo.Imm);
1118 return {{
1119 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1120 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1121 }};
1122}