blob: 745a08f19f63a831c3addc3b56aa38ecd9a4d1dd [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Tom Stellard8b1c53b2019-06-17 16:27:43 +000062static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63 if (Reg == AMDGPU::SCC)
64 return true;
65
66 if (TargetRegisterInfo::isPhysicalRegister(Reg))
67 return false;
68
69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70 const TargetRegisterClass *RC =
71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
72 if (RC)
73 return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
74 MRI.getType(Reg).getSizeInBits() == 1;
75
76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77 return RB->getID() == AMDGPU::SCCRegBankID;
78}
79
Tom Stellard1e0edad2018-05-10 21:20:10 +000080bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
81 MachineBasicBlock *BB = I.getParent();
82 MachineFunction *MF = BB->getParent();
83 MachineRegisterInfo &MRI = MF->getRegInfo();
84 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +000085
86 // Special case for COPY from the scc register bank. The scc register bank
87 // is modeled using 32-bit sgprs.
88 const MachineOperand &Src = I.getOperand(1);
89 unsigned SrcReg = Src.getReg();
90 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
91 unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
92 unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
93
94 // We have a copy from a 32-bit to 64-bit register. This happens
95 // when we are selecting scc->vcc copies.
96 if (DstSize == 64) {
97 const DebugLoc &DL = I.getDebugLoc();
98 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
99 .addImm(0)
100 .addReg(SrcReg);
101 if (!MRI.getRegClassOrNull(SrcReg))
102 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
103 I.eraseFromParent();
104 return true;
105 }
106 }
107
Tom Stellard1e0edad2018-05-10 21:20:10 +0000108 for (const MachineOperand &MO : I.operands()) {
109 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
110 continue;
111
112 const TargetRegisterClass *RC =
113 TRI.getConstrainedRegClassForOperand(MO, MRI);
114 if (!RC)
115 continue;
116 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
117 }
118 return true;
119}
120
Tom Stellardca166212017-01-30 21:56:46 +0000121MachineOperand
122AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
123 unsigned SubIdx) const {
124
125 MachineInstr *MI = MO.getParent();
126 MachineBasicBlock *BB = MO.getParent()->getParent();
127 MachineFunction *MF = BB->getParent();
128 MachineRegisterInfo &MRI = MF->getRegInfo();
129 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
130
131 if (MO.isReg()) {
132 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
133 unsigned Reg = MO.getReg();
134 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
135 .addReg(Reg, 0, ComposedSubIdx);
136
137 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
138 MO.isKill(), MO.isDead(), MO.isUndef(),
139 MO.isEarlyClobber(), 0, MO.isDebug(),
140 MO.isInternalRead());
141 }
142
143 assert(MO.isImm());
144
145 APInt Imm(64, MO.getImm());
146
147 switch (SubIdx) {
148 default:
149 llvm_unreachable("do not know to split immediate with this sub index.");
150 case AMDGPU::sub0:
151 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
152 case AMDGPU::sub1:
153 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
154 }
155}
156
Tom Stellard390a5f42018-07-13 21:05:14 +0000157static int64_t getConstant(const MachineInstr *MI) {
158 return MI->getOperand(1).getCImm()->getSExtValue();
159}
160
Tom Stellardca166212017-01-30 21:56:46 +0000161bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
162 MachineBasicBlock *BB = I.getParent();
163 MachineFunction *MF = BB->getParent();
164 MachineRegisterInfo &MRI = MF->getRegInfo();
165 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
166 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
167 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
168
169 if (Size != 64)
170 return false;
171
172 DebugLoc DL = I.getDebugLoc();
173
Tom Stellard124f5cc2017-01-31 15:24:11 +0000174 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
175 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
176
Tom Stellardca166212017-01-30 21:56:46 +0000177 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000178 .add(Lo1)
179 .add(Lo2);
180
181 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
182 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
Tom Stellardca166212017-01-30 21:56:46 +0000183
184 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
Tom Stellard124f5cc2017-01-31 15:24:11 +0000185 .add(Hi1)
186 .add(Hi2);
Tom Stellardca166212017-01-30 21:56:46 +0000187
188 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
189 .addReg(DstLo)
190 .addImm(AMDGPU::sub0)
191 .addReg(DstHi)
192 .addImm(AMDGPU::sub1);
193
194 for (MachineOperand &MO : I.explicit_operands()) {
195 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
196 continue;
197 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
198 }
199
200 I.eraseFromParent();
201 return true;
202}
203
Tom Stellard41f32192019-02-28 23:37:48 +0000204bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
205 MachineBasicBlock *BB = I.getParent();
206 MachineFunction *MF = BB->getParent();
207 MachineRegisterInfo &MRI = MF->getRegInfo();
208 assert(I.getOperand(2).getImm() % 32 == 0);
209 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
210 const DebugLoc &DL = I.getDebugLoc();
211 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
212 I.getOperand(0).getReg())
213 .addReg(I.getOperand(1).getReg(), 0, SubReg);
214
215 for (const MachineOperand &MO : Copy->operands()) {
216 const TargetRegisterClass *RC =
217 TRI.getConstrainedRegClassForOperand(MO, MRI);
218 if (!RC)
219 continue;
220 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
221 }
222 I.eraseFromParent();
223 return true;
224}
225
Tom Stellardca166212017-01-30 21:56:46 +0000226bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
227 return selectG_ADD(I);
228}
229
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000230bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
231 MachineBasicBlock *BB = I.getParent();
232 MachineFunction *MF = BB->getParent();
233 MachineRegisterInfo &MRI = MF->getRegInfo();
234 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000235
236 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
237 // regbank check here is to know why getConstrainedRegClassForOperand failed.
238 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
239 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
240 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
241 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
242 return true;
243 }
244
245 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000246}
247
Tom Stellard33634d1b2019-03-01 00:50:26 +0000248bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
249 MachineBasicBlock *BB = I.getParent();
250 MachineFunction *MF = BB->getParent();
251 MachineRegisterInfo &MRI = MF->getRegInfo();
252 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
253 DebugLoc DL = I.getDebugLoc();
254 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
255 .addDef(I.getOperand(0).getReg())
256 .addReg(I.getOperand(1).getReg())
257 .addReg(I.getOperand(2).getReg())
258 .addImm(SubReg);
259
260 for (const MachineOperand &MO : Ins->operands()) {
261 if (!MO.isReg())
262 continue;
263 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
264 continue;
265
266 const TargetRegisterClass *RC =
267 TRI.getConstrainedRegClassForOperand(MO, MRI);
268 if (!RC)
269 continue;
270 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
271 }
272 I.eraseFromParent();
273 return true;
274}
275
Tom Stellarda9284732018-06-14 19:26:37 +0000276bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
277 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000278 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000279 switch (IntrinsicID) {
280 default:
281 break;
Tom Stellardac684712018-07-13 22:16:03 +0000282 case Intrinsic::maxnum:
283 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000284 case Intrinsic::amdgcn_cvt_pkrtz:
285 return selectImpl(I, CoverageInfo);
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +0000286
287 case Intrinsic::amdgcn_kernarg_segment_ptr: {
288 MachineFunction *MF = I.getParent()->getParent();
289 MachineRegisterInfo &MRI = MF->getRegInfo();
290 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
291 const ArgDescriptor *InputPtrReg;
292 const TargetRegisterClass *RC;
293 const DebugLoc &DL = I.getDebugLoc();
294
295 std::tie(InputPtrReg, RC)
296 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
297 if (!InputPtrReg)
298 report_fatal_error("missing kernarg segment ptr");
299
300 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
301 .add(I.getOperand(0))
302 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
303 I.eraseFromParent();
304 return true;
305 }
Tom Stellarda9284732018-06-14 19:26:37 +0000306 }
307 return false;
308}
309
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000310static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
311 assert(Size == 32 || Size == 64);
312 switch (P) {
313 default:
314 llvm_unreachable("Unknown condition code!");
315 case CmpInst::ICMP_NE:
316 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
317 case CmpInst::ICMP_EQ:
318 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
319 case CmpInst::ICMP_SGT:
320 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
321 case CmpInst::ICMP_SGE:
322 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
323 case CmpInst::ICMP_SLT:
324 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
325 case CmpInst::ICMP_SLE:
326 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
327 case CmpInst::ICMP_UGT:
328 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
329 case CmpInst::ICMP_UGE:
330 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
331 case CmpInst::ICMP_ULT:
332 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
333 case CmpInst::ICMP_ULE:
334 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
335 }
336}
337
338static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
339 // FIXME: VI supports 64-bit comparse.
340 assert(Size == 32);
341 switch (P) {
342 default:
343 llvm_unreachable("Unknown condition code!");
344 case CmpInst::ICMP_NE:
345 return AMDGPU::S_CMP_LG_U32;
346 case CmpInst::ICMP_EQ:
347 return AMDGPU::S_CMP_EQ_U32;
348 case CmpInst::ICMP_SGT:
349 return AMDGPU::S_CMP_GT_I32;
350 case CmpInst::ICMP_SGE:
351 return AMDGPU::S_CMP_GE_I32;
352 case CmpInst::ICMP_SLT:
353 return AMDGPU::S_CMP_LT_I32;
354 case CmpInst::ICMP_SLE:
355 return AMDGPU::S_CMP_LE_I32;
356 case CmpInst::ICMP_UGT:
357 return AMDGPU::S_CMP_GT_U32;
358 case CmpInst::ICMP_UGE:
359 return AMDGPU::S_CMP_GE_U32;
360 case CmpInst::ICMP_ULT:
361 return AMDGPU::S_CMP_LT_U32;
362 case CmpInst::ICMP_ULE:
363 return AMDGPU::S_CMP_LE_U32;
364 }
365}
366
367bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
368 MachineBasicBlock *BB = I.getParent();
369 MachineFunction *MF = BB->getParent();
370 MachineRegisterInfo &MRI = MF->getRegInfo();
371 DebugLoc DL = I.getDebugLoc();
372
373 unsigned SrcReg = I.getOperand(2).getReg();
374 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
375 // FIXME: VI supports 64-bit compares.
376 assert(Size == 32);
377
378 unsigned CCReg = I.getOperand(0).getReg();
379 if (isSCC(CCReg, MRI)) {
380 unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
381 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
382 .add(I.getOperand(2))
383 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000384 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
385 .addReg(AMDGPU::SCC);
386 bool Ret =
387 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
388 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000389 I.eraseFromParent();
390 return Ret;
391 }
392
393 assert(Size == 32 || Size == 64);
394 unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
395 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
396 I.getOperand(0).getReg())
397 .add(I.getOperand(2))
398 .add(I.getOperand(3));
399 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
400 AMDGPU::SReg_64RegClass, MRI);
401 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
402 I.eraseFromParent();
403 return Ret;
404}
405
Tom Stellard390a5f42018-07-13 21:05:14 +0000406static MachineInstr *
407buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
408 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
409 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
410 const DebugLoc &DL = Insert->getDebugLoc();
411 MachineBasicBlock &BB = *Insert->getParent();
412 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
413 return BuildMI(BB, Insert, DL, TII.get(Opcode))
414 .addImm(Tgt)
415 .addReg(Reg0)
416 .addReg(Reg1)
417 .addReg(Reg2)
418 .addReg(Reg3)
419 .addImm(VM)
420 .addImm(Compr)
421 .addImm(Enabled);
422}
423
424bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
425 MachineInstr &I,
426 CodeGenCoverage &CoverageInfo) const {
427 MachineBasicBlock *BB = I.getParent();
428 MachineFunction *MF = BB->getParent();
429 MachineRegisterInfo &MRI = MF->getRegInfo();
430
431 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
432 switch (IntrinsicID) {
433 case Intrinsic::amdgcn_exp: {
434 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
435 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
436 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
437 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
438
439 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
440 I.getOperand(4).getReg(),
441 I.getOperand(5).getReg(),
442 I.getOperand(6).getReg(),
443 VM, false, Enabled, Done);
444
445 I.eraseFromParent();
446 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
447 }
448 case Intrinsic::amdgcn_exp_compr: {
449 const DebugLoc &DL = I.getDebugLoc();
450 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
451 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
452 unsigned Reg0 = I.getOperand(3).getReg();
453 unsigned Reg1 = I.getOperand(4).getReg();
454 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
455 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
456 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
457
458 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
459 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
460 true, Enabled, Done);
461
462 I.eraseFromParent();
463 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
464 }
465 }
466 return false;
467}
468
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000469bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
470 MachineBasicBlock *BB = I.getParent();
471 MachineFunction *MF = BB->getParent();
472 MachineRegisterInfo &MRI = MF->getRegInfo();
473 const DebugLoc &DL = I.getDebugLoc();
474
475 unsigned DstReg = I.getOperand(0).getReg();
476 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
477 assert(Size == 32 || Size == 64);
478 const MachineOperand &CCOp = I.getOperand(1);
479 unsigned CCReg = CCOp.getReg();
480 if (isSCC(CCReg, MRI)) {
481 unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
482 AMDGPU::S_CSELECT_B64;
483 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
484 .addReg(CCReg);
485
486 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
487 // bank, because it does not cover the register class that we used to represent
488 // for it. So we need to manually set the register class here.
489 if (!MRI.getRegClassOrNull(CCReg))
490 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
491 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
492 .add(I.getOperand(2))
493 .add(I.getOperand(3));
494
495 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
496 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
497 I.eraseFromParent();
498 return Ret;
499 }
500
501 assert(Size == 32);
502 // FIXME: Support 64-bit select
503 MachineInstr *Select =
504 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
505 .addImm(0)
506 .add(I.getOperand(3))
507 .addImm(0)
508 .add(I.getOperand(2))
509 .add(I.getOperand(1));
510
511 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
512 I.eraseFromParent();
513 return Ret;
514}
515
Tom Stellardca166212017-01-30 21:56:46 +0000516bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
517 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000518 MachineFunction *MF = BB->getParent();
519 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000520 DebugLoc DL = I.getDebugLoc();
Tom Stellard655fdd32018-05-11 23:12:49 +0000521 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
522 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000523
524 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000525 switch (StoreSize) {
526 default:
527 return false;
528 case 32:
529 Opcode = AMDGPU::FLAT_STORE_DWORD;
530 break;
531 case 64:
532 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
533 break;
534 case 96:
535 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
536 break;
537 case 128:
538 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
539 break;
540 }
541
542 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000543 .add(I.getOperand(1))
544 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000545 .addImm(0) // offset
546 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000547 .addImm(0) // slc
548 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000549
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000550
Tom Stellardca166212017-01-30 21:56:46 +0000551 // Now that we selected an opcode, we need to constrain the register
552 // operands to use appropriate classes.
553 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
554
555 I.eraseFromParent();
556 return Ret;
557}
558
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000559static int sizeToSubRegIndex(unsigned Size) {
560 switch (Size) {
561 case 32:
562 return AMDGPU::sub0;
563 case 64:
564 return AMDGPU::sub0_sub1;
565 case 96:
566 return AMDGPU::sub0_sub1_sub2;
567 case 128:
568 return AMDGPU::sub0_sub1_sub2_sub3;
569 case 256:
570 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
571 default:
572 if (Size < 32)
573 return AMDGPU::sub0;
574 if (Size > 256)
575 return -1;
576 return sizeToSubRegIndex(PowerOf2Ceil(Size));
577 }
578}
579
580bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
581 MachineBasicBlock *BB = I.getParent();
582 MachineFunction *MF = BB->getParent();
583 MachineRegisterInfo &MRI = MF->getRegInfo();
584
585 unsigned DstReg = I.getOperand(0).getReg();
586 unsigned SrcReg = I.getOperand(1).getReg();
587 const LLT DstTy = MRI.getType(DstReg);
588 const LLT SrcTy = MRI.getType(SrcReg);
589 if (!DstTy.isScalar())
590 return false;
591
592 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
593 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
594 if (SrcRB != DstRB)
595 return false;
596
597 unsigned DstSize = DstTy.getSizeInBits();
598 unsigned SrcSize = SrcTy.getSizeInBits();
599
600 const TargetRegisterClass *SrcRC
601 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
602 const TargetRegisterClass *DstRC
603 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
604
605 if (SrcSize > 32) {
606 int SubRegIdx = sizeToSubRegIndex(DstSize);
607 if (SubRegIdx == -1)
608 return false;
609
610 // Deal with weird cases where the class only partially supports the subreg
611 // index.
612 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
613 if (!SrcRC)
614 return false;
615
616 I.getOperand(1).setSubReg(SubRegIdx);
617 }
618
619 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
620 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
621 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
622 return false;
623 }
624
625 I.setDesc(TII.get(TargetOpcode::COPY));
626 return true;
627}
628
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000629bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
630 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
631 const DebugLoc &DL = I.getDebugLoc();
632 MachineBasicBlock &MBB = *I.getParent();
633 MachineFunction &MF = *MBB.getParent();
634 MachineRegisterInfo &MRI = MF.getRegInfo();
635 const unsigned DstReg = I.getOperand(0).getReg();
636 const unsigned SrcReg = I.getOperand(1).getReg();
637
638 const LLT DstTy = MRI.getType(DstReg);
639 const LLT SrcTy = MRI.getType(SrcReg);
640 const LLT S1 = LLT::scalar(1);
641 const unsigned SrcSize = SrcTy.getSizeInBits();
642 const unsigned DstSize = DstTy.getSizeInBits();
643 if (!DstTy.isScalar())
644 return false;
645
646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
647
648 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
649 if (SrcTy != S1 || DstSize > 64) // Invalid
650 return false;
651
652 unsigned Opcode =
653 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
654 const TargetRegisterClass *DstRC =
655 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
656
657 // FIXME: Create an extra copy to avoid incorrectly constraining the result
658 // of the scc producer.
659 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
660 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
661 .addReg(SrcReg);
662 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
663 .addReg(TmpReg);
664
665 // The instruction operands are backwards from what you would expect.
666 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
667 .addImm(0)
668 .addImm(Signed ? -1 : 1);
669 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
670 }
671
672 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
673 if (SrcTy != S1) // Invalid
674 return false;
675
676 MachineInstr *ExtI =
677 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
678 .addImm(0) // src0_modifiers
679 .addImm(0) // src0
680 .addImm(0) // src1_modifiers
681 .addImm(Signed ? -1 : 1) // src1
682 .addUse(SrcReg);
683 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
684 }
685
686 if (I.getOpcode() == AMDGPU::G_ANYEXT)
687 return selectCOPY(I);
688
689 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
690 // 64-bit should have been split up in RegBankSelect
691 //
692 // TODO: USE V_AND_B32 when the constant mask is an inline immediate for
693 // unsigned for smaller code size.
694 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
695 MachineInstr *ExtI =
696 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
697 .addReg(SrcReg)
698 .addImm(0) // Offset
699 .addImm(SrcSize); // Width
700 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
701 }
702
703 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
704 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
705 return false;
706
707 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
708 const unsigned SextOpc = SrcSize == 8 ?
709 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
710 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
711 .addReg(SrcReg);
712 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
713 }
714
715 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
716 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
717
718 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
719 if (DstSize > 32 && SrcSize <= 32) {
720 // We need a 64-bit register source, but the high bits don't matter.
721 unsigned ExtReg
722 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
723 unsigned UndefReg
724 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
725 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
726 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
727 .addReg(SrcReg)
728 .addImm(AMDGPU::sub0)
729 .addReg(UndefReg)
730 .addImm(AMDGPU::sub1);
731
732 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
733 .addReg(ExtReg)
734 .addImm(SrcSize << 16);
735
736 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
737 }
738
739 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
740 .addReg(SrcReg)
741 .addImm(SrcSize << 16);
742 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
743 }
744
745 return false;
746}
747
Tom Stellardca166212017-01-30 21:56:46 +0000748bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
749 MachineBasicBlock *BB = I.getParent();
750 MachineFunction *MF = BB->getParent();
751 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +0000752 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +0000753
Tom Stellarde182b282018-05-15 17:57:09 +0000754 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
755 if (ImmOp.isFPImm()) {
756 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
757 ImmOp.ChangeToImmediate(Imm.getZExtValue());
758 } else if (ImmOp.isCImm()) {
759 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
760 }
761
762 unsigned DstReg = I.getOperand(0).getReg();
763 unsigned Size;
764 bool IsSgpr;
765 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
766 if (RB) {
767 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
768 Size = MRI.getType(DstReg).getSizeInBits();
769 } else {
770 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
771 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +0000772 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +0000773 }
774
775 if (Size != 32 && Size != 64)
776 return false;
777
778 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +0000779 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +0000780 I.setDesc(TII.get(Opcode));
781 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +0000782 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
783 }
784
Tom Stellardca166212017-01-30 21:56:46 +0000785 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +0000786 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
787 &AMDGPU::VGPR_32RegClass;
788 unsigned LoReg = MRI.createVirtualRegister(RC);
789 unsigned HiReg = MRI.createVirtualRegister(RC);
790 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +0000791
Tom Stellarde182b282018-05-15 17:57:09 +0000792 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +0000793 .addImm(Imm.trunc(32).getZExtValue());
794
Tom Stellarde182b282018-05-15 17:57:09 +0000795 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +0000796 .addImm(Imm.ashr(32).getZExtValue());
797
Tom Stellarde182b282018-05-15 17:57:09 +0000798 const MachineInstr *RS =
799 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
800 .addReg(LoReg)
801 .addImm(AMDGPU::sub0)
802 .addReg(HiReg)
803 .addImm(AMDGPU::sub1);
804
Tom Stellardca166212017-01-30 21:56:46 +0000805 // We can't call constrainSelectedInstRegOperands here, because it doesn't
806 // work for target independent opcodes
807 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +0000808 const TargetRegisterClass *DstRC =
809 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
810 if (!DstRC)
811 return true;
812 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +0000813}
814
815static bool isConstant(const MachineInstr &MI) {
816 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
817}
818
819void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
820 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
821
822 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
823
824 assert(PtrMI);
825
826 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
827 return;
828
829 GEPInfo GEPInfo(*PtrMI);
830
831 for (unsigned i = 1, e = 3; i < e; ++i) {
832 const MachineOperand &GEPOp = PtrMI->getOperand(i);
833 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
834 assert(OpDef);
835 if (isConstant(*OpDef)) {
836 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
837 // are lacking other optimizations.
838 assert(GEPInfo.Imm == 0);
839 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
840 continue;
841 }
842 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
843 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
844 GEPInfo.SgprParts.push_back(GEPOp.getReg());
845 else
846 GEPInfo.VgprParts.push_back(GEPOp.getReg());
847 }
848
849 AddrInfo.push_back(GEPInfo);
850 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
851}
852
Tom Stellard79b5c382019-02-20 21:02:37 +0000853bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +0000854 if (!MI.hasOneMemOperand())
855 return false;
856
857 const MachineMemOperand *MMO = *MI.memoperands_begin();
858 const Value *Ptr = MMO->getValue();
859
860 // UndefValue means this is a load of a kernel input. These are uniform.
861 // Sometimes LDS instructions have constant pointers.
862 // If Ptr is null, then that means this mem operand contains a
863 // PseudoSourceValue like GOT.
864 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
865 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
866 return true;
867
Matt Arsenault923712b2018-02-09 16:57:57 +0000868 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
869 return true;
870
Tom Stellardca166212017-01-30 21:56:46 +0000871 const Instruction *I = dyn_cast<Instruction>(Ptr);
872 return I && I->getMetadata("amdgpu.uniform");
873}
874
Tom Stellardca166212017-01-30 21:56:46 +0000875bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
876 for (const GEPInfo &GEPInfo : AddrInfo) {
877 if (!GEPInfo.VgprParts.empty())
878 return true;
879 }
880 return false;
881}
882
Tom Stellardca166212017-01-30 21:56:46 +0000883bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
884 MachineBasicBlock *BB = I.getParent();
885 MachineFunction *MF = BB->getParent();
886 MachineRegisterInfo &MRI = MF->getRegInfo();
887 DebugLoc DL = I.getDebugLoc();
888 unsigned DstReg = I.getOperand(0).getReg();
889 unsigned PtrReg = I.getOperand(1).getReg();
890 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
891 unsigned Opcode;
892
893 SmallVector<GEPInfo, 4> AddrInfo;
894
895 getAddrModeInfo(I, MRI, AddrInfo);
896
Tom Stellardca166212017-01-30 21:56:46 +0000897 switch (LoadSize) {
898 default:
899 llvm_unreachable("Load size not supported\n");
900 case 32:
901 Opcode = AMDGPU::FLAT_LOAD_DWORD;
902 break;
903 case 64:
904 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
905 break;
906 }
907
908 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
909 .add(I.getOperand(0))
910 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +0000911 .addImm(0) // offset
912 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000913 .addImm(0) // slc
914 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000915
916 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
917 I.eraseFromParent();
918 return Ret;
919}
920
Daniel Sandersf76f3152017-11-16 00:46:35 +0000921bool AMDGPUInstructionSelector::select(MachineInstr &I,
922 CodeGenCoverage &CoverageInfo) const {
Tom Stellardca166212017-01-30 21:56:46 +0000923
Tom Stellard7712ee82018-06-22 00:44:29 +0000924 if (!isPreISelGenericOpcode(I.getOpcode())) {
925 if (I.isCopy())
926 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000927 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +0000928 }
Tom Stellardca166212017-01-30 21:56:46 +0000929
930 switch (I.getOpcode()) {
931 default:
Tom Stellard1dc90202018-05-10 20:53:06 +0000932 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000933 case TargetOpcode::G_ADD:
934 return selectG_ADD(I);
Tom Stellard7c650782018-10-05 04:34:09 +0000935 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +0000936 case TargetOpcode::G_BITCAST:
937 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +0000938 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +0000939 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +0000940 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +0000941 case TargetOpcode::G_EXTRACT:
942 return selectG_EXTRACT(I);
Tom Stellardca166212017-01-30 21:56:46 +0000943 case TargetOpcode::G_GEP:
944 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000945 case TargetOpcode::G_IMPLICIT_DEF:
946 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +0000947 case TargetOpcode::G_INSERT:
948 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +0000949 case TargetOpcode::G_INTRINSIC:
950 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000951 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
952 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000953 case TargetOpcode::G_ICMP:
954 return selectG_ICMP(I);
Tom Stellardca166212017-01-30 21:56:46 +0000955 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +0000956 if (selectImpl(I, CoverageInfo))
957 return true;
Tom Stellardca166212017-01-30 21:56:46 +0000958 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000959 case TargetOpcode::G_SELECT:
960 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +0000961 case TargetOpcode::G_STORE:
962 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000963 case TargetOpcode::G_TRUNC:
964 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000965 case TargetOpcode::G_SEXT:
966 case TargetOpcode::G_ZEXT:
967 case TargetOpcode::G_ANYEXT:
968 if (selectG_SZA_EXT(I)) {
969 I.eraseFromParent();
970 return true;
971 }
972
973 return false;
Tom Stellardca166212017-01-30 21:56:46 +0000974 }
975 return false;
976}
Tom Stellard1dc90202018-05-10 20:53:06 +0000977
Tom Stellard26fac0f2018-06-22 02:54:57 +0000978InstructionSelector::ComplexRendererFns
979AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
980 return {{
981 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
982 }};
983
984}
985
Tom Stellard1dc90202018-05-10 20:53:06 +0000986///
987/// This will select either an SGPR or VGPR operand and will save us from
988/// having to write an extra tablegen pattern.
989InstructionSelector::ComplexRendererFns
990AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
991 return {{
992 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
993 }};
994}
Tom Stellarddcc95e92018-05-11 05:44:16 +0000995
996InstructionSelector::ComplexRendererFns
997AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
998 return {{
999 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1000 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
1001 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1002 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1003 }};
1004}
Tom Stellard9a653572018-06-22 02:34:29 +00001005InstructionSelector::ComplexRendererFns
1006AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1007 return {{
1008 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1009 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1010 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1011 }};
1012}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001013
1014InstructionSelector::ComplexRendererFns
1015AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
1016 return {{
1017 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1018 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
1019 }};
1020}
Tom Stellard79b5c382019-02-20 21:02:37 +00001021
1022InstructionSelector::ComplexRendererFns
1023AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1024 MachineRegisterInfo &MRI =
1025 Root.getParent()->getParent()->getParent()->getRegInfo();
1026
1027 SmallVector<GEPInfo, 4> AddrInfo;
1028 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1029
1030 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1031 return None;
1032
1033 const GEPInfo &GEPInfo = AddrInfo[0];
1034
1035 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1036 return None;
1037
1038 unsigned PtrReg = GEPInfo.SgprParts[0];
1039 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1040 return {{
1041 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1042 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1043 }};
1044}
1045
1046InstructionSelector::ComplexRendererFns
1047AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1048 MachineRegisterInfo &MRI =
1049 Root.getParent()->getParent()->getParent()->getRegInfo();
1050
1051 SmallVector<GEPInfo, 4> AddrInfo;
1052 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1053
1054 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1055 return None;
1056
1057 const GEPInfo &GEPInfo = AddrInfo[0];
1058 unsigned PtrReg = GEPInfo.SgprParts[0];
1059 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1060 if (!isUInt<32>(EncodedImm))
1061 return None;
1062
1063 return {{
1064 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1065 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1066 }};
1067}
1068
1069InstructionSelector::ComplexRendererFns
1070AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1071 MachineInstr *MI = Root.getParent();
1072 MachineBasicBlock *MBB = MI->getParent();
1073 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1074
1075 SmallVector<GEPInfo, 4> AddrInfo;
1076 getAddrModeInfo(*MI, MRI, AddrInfo);
1077
1078 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1079 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1080 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1081 return None;
1082
1083 const GEPInfo &GEPInfo = AddrInfo[0];
1084 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1085 return None;
1086
1087 // If we make it this far we have a load with an 32-bit immediate offset.
1088 // It is OK to select this using a sgpr offset, because we have already
1089 // failed trying to select this load into one of the _IMM variants since
1090 // the _IMM Patterns are considered before the _SGPR patterns.
1091 unsigned PtrReg = GEPInfo.SgprParts[0];
1092 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1093 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1094 .addImm(GEPInfo.Imm);
1095 return {{
1096 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1097 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1098 }};
1099}