blob: f8f89593d0805bf938b9e691621cf74c45916eea [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +000070 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
71 // context of the register bank has been lost.
Matt Arsenault1daad912019-07-01 15:23:04 +000072 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
73 return false;
74 const LLT Ty = MRI.getType(Reg);
75 return Ty.isValid() && Ty.getSizeInBits() == 1;
76 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000077
78 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
79 return RB->getID() == AMDGPU::SCCRegBankID;
80}
81
Matt Arsenault2ab25f92019-07-01 16:06:02 +000082bool AMDGPUInstructionSelector::isVCC(Register Reg,
83 const MachineRegisterInfo &MRI) const {
84 if (TargetRegisterInfo::isPhysicalRegister(Reg))
85 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000086
87 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
88 const TargetRegisterClass *RC =
89 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
90 if (RC) {
Matt Arsenault18b71332019-07-15 19:44:07 +000091 const LLT Ty = MRI.getType(Reg);
Matt Arsenault2ab25f92019-07-01 16:06:02 +000092 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault18b71332019-07-15 19:44:07 +000093 Ty.isValid() && Ty.getSizeInBits() == 1;
Matt Arsenault9f992c22019-07-01 13:22:07 +000094 }
95
96 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
97 return RB->getID() == AMDGPU::VCCRegBankID;
98}
99
Tom Stellard1e0edad2018-05-10 21:20:10 +0000100bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Matt Arsenault18b71332019-07-15 19:44:07 +0000101 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard1e0edad2018-05-10 21:20:10 +0000102 MachineBasicBlock *BB = I.getParent();
103 MachineFunction *MF = BB->getParent();
104 MachineRegisterInfo &MRI = MF->getRegInfo();
105 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000106
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000107 const MachineOperand &Src = I.getOperand(1);
Matt Arsenault18b71332019-07-15 19:44:07 +0000108 MachineOperand &Dst = I.getOperand(0);
109 Register DstReg = Dst.getReg();
110 Register SrcReg = Src.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000111
Matt Arsenault18b71332019-07-15 19:44:07 +0000112 if (isVCC(DstReg, MRI)) {
113 if (SrcReg == AMDGPU::SCC) {
114 const TargetRegisterClass *RC
115 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
116 if (!RC)
117 return true;
118 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000119 }
Matt Arsenault18b71332019-07-15 19:44:07 +0000120
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000121 if (!isVCC(SrcReg, MRI)) {
122 // TODO: Should probably leave the copy and let copyPhysReg expand it.
123 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
124 return false;
Matt Arsenault3bfdb542019-07-15 19:45:49 +0000125
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000126 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
127 .addImm(0)
128 .addReg(SrcReg);
Matt Arsenault18b71332019-07-15 19:44:07 +0000129
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000130 if (!MRI.getRegClassOrNull(SrcReg))
131 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
132 I.eraseFromParent();
133 return true;
134 }
Matt Arsenaultad19b502019-07-15 19:48:36 +0000135
136 const TargetRegisterClass *RC =
137 TRI.getConstrainedRegClassForOperand(Dst, MRI);
138 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
139 return false;
140
141 // Don't constrain the source register to a class so the def instruction
142 // handles it (unless it's undef).
143 //
144 // FIXME: This is a hack. When selecting the def, we neeed to know
145 // specifically know that the result is VCCRegBank, and not just an SGPR
146 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
147 if (Src.isUndef()) {
148 const TargetRegisterClass *SrcRC =
149 TRI.getConstrainedRegClassForOperand(Src, MRI);
150 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
151 return false;
152 }
153
154 return true;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000155 }
156
Tom Stellard1e0edad2018-05-10 21:20:10 +0000157 for (const MachineOperand &MO : I.operands()) {
158 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
159 continue;
160
161 const TargetRegisterClass *RC =
162 TRI.getConstrainedRegClassForOperand(MO, MRI);
163 if (!RC)
164 continue;
165 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
166 }
167 return true;
168}
169
Matt Arsenaulte1006252019-07-01 16:32:47 +0000170bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
171 MachineBasicBlock *BB = I.getParent();
172 MachineFunction *MF = BB->getParent();
173 MachineRegisterInfo &MRI = MF->getRegInfo();
174
175 const Register DefReg = I.getOperand(0).getReg();
176 const LLT DefTy = MRI.getType(DefReg);
177
178 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
179
180 const RegClassOrRegBank &RegClassOrBank =
181 MRI.getRegClassOrRegBank(DefReg);
182
183 const TargetRegisterClass *DefRC
184 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
185 if (!DefRC) {
186 if (!DefTy.isValid()) {
187 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
188 return false;
189 }
190
191 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
192 if (RB.getID() == AMDGPU::SCCRegBankID) {
193 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
194 return false;
195 }
196
197 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
198 if (!DefRC) {
199 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
200 return false;
201 }
202 }
203
204 I.setDesc(TII.get(TargetOpcode::PHI));
205 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
206}
207
Tom Stellardca166212017-01-30 21:56:46 +0000208MachineOperand
209AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000210 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000211 unsigned SubIdx) const {
212
213 MachineInstr *MI = MO.getParent();
214 MachineBasicBlock *BB = MO.getParent()->getParent();
215 MachineFunction *MF = BB->getParent();
216 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000217 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000218
219 if (MO.isReg()) {
220 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
221 unsigned Reg = MO.getReg();
222 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
223 .addReg(Reg, 0, ComposedSubIdx);
224
225 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
226 MO.isKill(), MO.isDead(), MO.isUndef(),
227 MO.isEarlyClobber(), 0, MO.isDebug(),
228 MO.isInternalRead());
229 }
230
231 assert(MO.isImm());
232
233 APInt Imm(64, MO.getImm());
234
235 switch (SubIdx) {
236 default:
237 llvm_unreachable("do not know to split immediate with this sub index.");
238 case AMDGPU::sub0:
239 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
240 case AMDGPU::sub1:
241 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
242 }
243}
244
Tom Stellard390a5f42018-07-13 21:05:14 +0000245static int64_t getConstant(const MachineInstr *MI) {
246 return MI->getOperand(1).getCImm()->getSExtValue();
247}
248
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000249static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
250 switch (Opc) {
251 case AMDGPU::G_AND:
252 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
253 case AMDGPU::G_OR:
254 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
255 case AMDGPU::G_XOR:
256 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
257 default:
258 llvm_unreachable("not a bit op");
259 }
260}
261
262bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
263 MachineBasicBlock *BB = I.getParent();
264 MachineFunction *MF = BB->getParent();
265 MachineRegisterInfo &MRI = MF->getRegInfo();
266 MachineOperand &Dst = I.getOperand(0);
267 MachineOperand &Src0 = I.getOperand(1);
268 MachineOperand &Src1 = I.getOperand(2);
269 Register DstReg = Dst.getReg();
270 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
271
272 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
273 if (DstRB->getID() == AMDGPU::VCCRegBankID) {
274 const TargetRegisterClass *RC = TRI.getBoolRC();
275 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
276 RC == &AMDGPU::SReg_64RegClass);
277 I.setDesc(TII.get(InstOpc));
278
279 // FIXME: Hack to avoid turning the register bank into a register class.
280 // The selector for G_ICMP relies on seeing the register bank for the result
281 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
282 // be ambiguous whether it's a scalar or vector bool.
283 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
284 MRI.setRegClass(Src0.getReg(), RC);
285 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
286 MRI.setRegClass(Src1.getReg(), RC);
287
288 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
289 }
290
291 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
292 // the result?
293 if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000294 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
295 I.setDesc(TII.get(InstOpc));
Matt Arsenault22c4a142019-07-16 14:28:30 +0000296
297 const TargetRegisterClass *RC
298 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
299 if (!RC)
300 return false;
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000301 return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
302 RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
303 RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
304 }
305
306 return false;
307}
308
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000309bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000310 MachineBasicBlock *BB = I.getParent();
311 MachineFunction *MF = BB->getParent();
312 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000313 Register DstReg = I.getOperand(0).getReg();
314 const DebugLoc &DL = I.getDebugLoc();
315 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
316 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
317 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000318 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000319
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000320 if (Size == 32) {
321 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000322 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000323 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000324 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000325 .add(I.getOperand(1))
326 .add(I.getOperand(2));
327 I.eraseFromParent();
328 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
329 }
Tom Stellardca166212017-01-30 21:56:46 +0000330
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000331 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000332 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
333 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000334 I.addOperand(*MF, MachineOperand::CreateImm(0));
335 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
336 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
337 }
Tom Stellardca166212017-01-30 21:56:46 +0000338
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000339 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
340
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000341 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
342 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000343 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000344 .addDef(UnusedCarry, RegState::Dead)
345 .add(I.getOperand(1))
346 .add(I.getOperand(2))
347 .addImm(0);
348 I.eraseFromParent();
349 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000350 }
351
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000352 assert(!Sub && "illegal sub should not reach here");
353
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000354 const TargetRegisterClass &RC
355 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
356 const TargetRegisterClass &HalfRC
357 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
358
359 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
360 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
361 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
362 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
363
364 Register DstLo = MRI.createVirtualRegister(&HalfRC);
365 Register DstHi = MRI.createVirtualRegister(&HalfRC);
366
367 if (IsSALU) {
368 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
369 .add(Lo1)
370 .add(Lo2);
371 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
372 .add(Hi1)
373 .add(Hi2);
374 } else {
375 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
376 Register CarryReg = MRI.createVirtualRegister(CarryRC);
377 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
378 .addDef(CarryReg)
379 .add(Lo1)
380 .add(Lo2)
381 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000382 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000383 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
384 .add(Hi1)
385 .add(Hi2)
386 .addReg(CarryReg, RegState::Kill)
387 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000388
389 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
390 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000391 }
392
393 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
394 .addReg(DstLo)
395 .addImm(AMDGPU::sub0)
396 .addReg(DstHi)
397 .addImm(AMDGPU::sub1);
398
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000399
400 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000401 return false;
402
Tom Stellardca166212017-01-30 21:56:46 +0000403 I.eraseFromParent();
404 return true;
405}
406
Tom Stellard41f32192019-02-28 23:37:48 +0000407bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
408 MachineBasicBlock *BB = I.getParent();
409 MachineFunction *MF = BB->getParent();
410 MachineRegisterInfo &MRI = MF->getRegInfo();
411 assert(I.getOperand(2).getImm() % 32 == 0);
412 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
413 const DebugLoc &DL = I.getDebugLoc();
414 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
415 I.getOperand(0).getReg())
416 .addReg(I.getOperand(1).getReg(), 0, SubReg);
417
418 for (const MachineOperand &MO : Copy->operands()) {
419 const TargetRegisterClass *RC =
420 TRI.getConstrainedRegClassForOperand(MO, MRI);
421 if (!RC)
422 continue;
423 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
424 }
425 I.eraseFromParent();
426 return true;
427}
428
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000429bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
430 MachineBasicBlock *BB = MI.getParent();
431 MachineFunction *MF = BB->getParent();
432 MachineRegisterInfo &MRI = MF->getRegInfo();
433 Register DstReg = MI.getOperand(0).getReg();
434 LLT DstTy = MRI.getType(DstReg);
435 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
436
437 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000438 if (SrcSize < 32)
439 return false;
440
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000441 const DebugLoc &DL = MI.getDebugLoc();
442 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
443 const unsigned DstSize = DstTy.getSizeInBits();
444 const TargetRegisterClass *DstRC =
445 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
446 if (!DstRC)
447 return false;
448
449 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
450 MachineInstrBuilder MIB =
451 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
452 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
453 MachineOperand &Src = MI.getOperand(I + 1);
454 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
455 MIB.addImm(SubRegs[I]);
456
457 const TargetRegisterClass *SrcRC
458 = TRI.getConstrainedRegClassForOperand(Src, MRI);
459 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
460 return false;
461 }
462
463 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
464 return false;
465
466 MI.eraseFromParent();
467 return true;
468}
469
Matt Arsenault872f38b2019-07-09 14:02:26 +0000470bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
471 MachineBasicBlock *BB = MI.getParent();
472 MachineFunction *MF = BB->getParent();
473 MachineRegisterInfo &MRI = MF->getRegInfo();
474 const int NumDst = MI.getNumOperands() - 1;
475
476 MachineOperand &Src = MI.getOperand(NumDst);
477
478 Register SrcReg = Src.getReg();
479 Register DstReg0 = MI.getOperand(0).getReg();
480 LLT DstTy = MRI.getType(DstReg0);
481 LLT SrcTy = MRI.getType(SrcReg);
482
483 const unsigned DstSize = DstTy.getSizeInBits();
484 const unsigned SrcSize = SrcTy.getSizeInBits();
485 const DebugLoc &DL = MI.getDebugLoc();
486 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
487
488 const TargetRegisterClass *SrcRC =
489 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
490 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
491 return false;
492
493 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
494
495 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
496 // source, and this relies on the fact that the same subregister indices are
497 // used for both.
498 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
499 for (int I = 0, E = NumDst; I != E; ++I) {
500 MachineOperand &Dst = MI.getOperand(I);
501 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
502 .addReg(SrcReg, SrcFlags, SubRegs[I]);
503
504 const TargetRegisterClass *DstRC =
505 TRI.getConstrainedRegClassForOperand(Dst, MRI);
506 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
507 return false;
508 }
509
510 MI.eraseFromParent();
511 return true;
512}
513
Tom Stellardca166212017-01-30 21:56:46 +0000514bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000515 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000516}
517
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000518bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
519 MachineBasicBlock *BB = I.getParent();
520 MachineFunction *MF = BB->getParent();
521 MachineRegisterInfo &MRI = MF->getRegInfo();
522 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000523
524 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
525 // regbank check here is to know why getConstrainedRegClassForOperand failed.
526 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
527 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
528 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
529 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
530 return true;
531 }
532
533 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000534}
535
Tom Stellard33634d1b2019-03-01 00:50:26 +0000536bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
537 MachineBasicBlock *BB = I.getParent();
538 MachineFunction *MF = BB->getParent();
539 MachineRegisterInfo &MRI = MF->getRegInfo();
540 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
541 DebugLoc DL = I.getDebugLoc();
542 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
543 .addDef(I.getOperand(0).getReg())
544 .addReg(I.getOperand(1).getReg())
545 .addReg(I.getOperand(2).getReg())
546 .addImm(SubReg);
547
548 for (const MachineOperand &MO : Ins->operands()) {
549 if (!MO.isReg())
550 continue;
551 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
552 continue;
553
554 const TargetRegisterClass *RC =
555 TRI.getConstrainedRegClassForOperand(MO, MRI);
556 if (!RC)
557 continue;
558 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
559 }
560 I.eraseFromParent();
561 return true;
562}
563
Matt Arsenault50be3482019-07-02 14:52:16 +0000564bool AMDGPUInstructionSelector::selectG_INTRINSIC(
565 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000566 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000567 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000568 case Intrinsic::maxnum:
569 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000570 case Intrinsic::amdgcn_cvt_pkrtz:
571 return selectImpl(I, CoverageInfo);
Matt Arsenault53fa7592019-07-15 18:25:24 +0000572 case Intrinsic::amdgcn_if_break: {
573 MachineBasicBlock *BB = I.getParent();
574 MachineFunction *MF = BB->getParent();
575 MachineRegisterInfo &MRI = MF->getRegInfo();
576
577 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
578 // SelectionDAG uses for wave32 vs wave64.
579 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
580 .add(I.getOperand(0))
581 .add(I.getOperand(2))
582 .add(I.getOperand(3));
583
584 Register DstReg = I.getOperand(0).getReg();
585 Register Src0Reg = I.getOperand(2).getReg();
586 Register Src1Reg = I.getOperand(3).getReg();
587
588 I.eraseFromParent();
589
590 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
591 if (!MRI.getRegClassOrNull(Reg))
592 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
593 }
594
595 return true;
596 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000597 default:
598 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000599 }
Tom Stellarda9284732018-06-14 19:26:37 +0000600}
601
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000602static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
603 if (Size != 32 && Size != 64)
604 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000605 switch (P) {
606 default:
607 llvm_unreachable("Unknown condition code!");
608 case CmpInst::ICMP_NE:
609 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
610 case CmpInst::ICMP_EQ:
611 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
612 case CmpInst::ICMP_SGT:
613 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
614 case CmpInst::ICMP_SGE:
615 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
616 case CmpInst::ICMP_SLT:
617 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
618 case CmpInst::ICMP_SLE:
619 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
620 case CmpInst::ICMP_UGT:
621 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
622 case CmpInst::ICMP_UGE:
623 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
624 case CmpInst::ICMP_ULT:
625 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
626 case CmpInst::ICMP_ULE:
627 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
628 }
629}
630
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000631int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
632 unsigned Size) const {
633 if (Size == 64) {
634 if (!STI.hasScalarCompareEq64())
635 return -1;
636
637 switch (P) {
638 case CmpInst::ICMP_NE:
639 return AMDGPU::S_CMP_LG_U64;
640 case CmpInst::ICMP_EQ:
641 return AMDGPU::S_CMP_EQ_U64;
642 default:
643 return -1;
644 }
645 }
646
647 if (Size != 32)
648 return -1;
649
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000650 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000651 case CmpInst::ICMP_NE:
652 return AMDGPU::S_CMP_LG_U32;
653 case CmpInst::ICMP_EQ:
654 return AMDGPU::S_CMP_EQ_U32;
655 case CmpInst::ICMP_SGT:
656 return AMDGPU::S_CMP_GT_I32;
657 case CmpInst::ICMP_SGE:
658 return AMDGPU::S_CMP_GE_I32;
659 case CmpInst::ICMP_SLT:
660 return AMDGPU::S_CMP_LT_I32;
661 case CmpInst::ICMP_SLE:
662 return AMDGPU::S_CMP_LE_I32;
663 case CmpInst::ICMP_UGT:
664 return AMDGPU::S_CMP_GT_U32;
665 case CmpInst::ICMP_UGE:
666 return AMDGPU::S_CMP_GE_U32;
667 case CmpInst::ICMP_ULT:
668 return AMDGPU::S_CMP_LT_U32;
669 case CmpInst::ICMP_ULE:
670 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000671 default:
672 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000673 }
674}
675
676bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
677 MachineBasicBlock *BB = I.getParent();
678 MachineFunction *MF = BB->getParent();
679 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000680 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000681
682 unsigned SrcReg = I.getOperand(2).getReg();
683 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000684
685 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000686
687 unsigned CCReg = I.getOperand(0).getReg();
688 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000689 int Opcode = getS_CMPOpcode(Pred, Size);
690 if (Opcode == -1)
691 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000692 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
693 .add(I.getOperand(2))
694 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000695 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
696 .addReg(AMDGPU::SCC);
697 bool Ret =
698 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
699 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000700 I.eraseFromParent();
701 return Ret;
702 }
703
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000704 int Opcode = getV_CMPOpcode(Pred, Size);
705 if (Opcode == -1)
706 return false;
707
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000708 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
709 I.getOperand(0).getReg())
710 .add(I.getOperand(2))
711 .add(I.getOperand(3));
712 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000713 *TRI.getBoolRC(), MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000714 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
715 I.eraseFromParent();
716 return Ret;
717}
718
Tom Stellard390a5f42018-07-13 21:05:14 +0000719static MachineInstr *
720buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
721 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
722 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
723 const DebugLoc &DL = Insert->getDebugLoc();
724 MachineBasicBlock &BB = *Insert->getParent();
725 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
726 return BuildMI(BB, Insert, DL, TII.get(Opcode))
727 .addImm(Tgt)
728 .addReg(Reg0)
729 .addReg(Reg1)
730 .addReg(Reg2)
731 .addReg(Reg3)
732 .addImm(VM)
733 .addImm(Compr)
734 .addImm(Enabled);
735}
736
737bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000738 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000739 MachineBasicBlock *BB = I.getParent();
740 MachineFunction *MF = BB->getParent();
741 MachineRegisterInfo &MRI = MF->getRegInfo();
742
743 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
744 switch (IntrinsicID) {
745 case Intrinsic::amdgcn_exp: {
746 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
747 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
748 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
749 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
750
751 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
752 I.getOperand(4).getReg(),
753 I.getOperand(5).getReg(),
754 I.getOperand(6).getReg(),
755 VM, false, Enabled, Done);
756
757 I.eraseFromParent();
758 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
759 }
760 case Intrinsic::amdgcn_exp_compr: {
761 const DebugLoc &DL = I.getDebugLoc();
762 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
763 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
764 unsigned Reg0 = I.getOperand(3).getReg();
765 unsigned Reg1 = I.getOperand(4).getReg();
766 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
767 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
768 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
769
770 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
771 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
772 true, Enabled, Done);
773
774 I.eraseFromParent();
775 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
776 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000777 case Intrinsic::amdgcn_end_cf: {
778 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
779 // SelectionDAG uses for wave32 vs wave64.
780 BuildMI(*BB, &I, I.getDebugLoc(),
781 TII.get(AMDGPU::SI_END_CF))
782 .add(I.getOperand(1));
783
784 Register Reg = I.getOperand(1).getReg();
785 I.eraseFromParent();
786
787 if (!MRI.getRegClassOrNull(Reg))
788 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
789 return true;
790 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000791 default:
792 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000793 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000794}
795
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000796bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
797 MachineBasicBlock *BB = I.getParent();
798 MachineFunction *MF = BB->getParent();
799 MachineRegisterInfo &MRI = MF->getRegInfo();
800 const DebugLoc &DL = I.getDebugLoc();
801
802 unsigned DstReg = I.getOperand(0).getReg();
803 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000804 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000805 const MachineOperand &CCOp = I.getOperand(1);
806 unsigned CCReg = CCOp.getReg();
807 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000808 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
809 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000810 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
811 .addReg(CCReg);
812
813 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
814 // bank, because it does not cover the register class that we used to represent
815 // for it. So we need to manually set the register class here.
816 if (!MRI.getRegClassOrNull(CCReg))
817 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
818 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
819 .add(I.getOperand(2))
820 .add(I.getOperand(3));
821
822 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
823 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
824 I.eraseFromParent();
825 return Ret;
826 }
827
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000828 // Wide VGPR select should have been split in RegBankSelect.
829 if (Size > 32)
830 return false;
831
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000832 MachineInstr *Select =
833 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
834 .addImm(0)
835 .add(I.getOperand(3))
836 .addImm(0)
837 .add(I.getOperand(2))
838 .add(I.getOperand(1));
839
840 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
841 I.eraseFromParent();
842 return Ret;
843}
844
Tom Stellardca166212017-01-30 21:56:46 +0000845bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
846 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000847 MachineFunction *MF = BB->getParent();
848 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000849 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000850 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
851 if (PtrSize != 64) {
852 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
853 return false;
854 }
855
Tom Stellard655fdd32018-05-11 23:12:49 +0000856 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
857 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000858
859 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000860 switch (StoreSize) {
861 default:
862 return false;
863 case 32:
864 Opcode = AMDGPU::FLAT_STORE_DWORD;
865 break;
866 case 64:
867 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
868 break;
869 case 96:
870 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
871 break;
872 case 128:
873 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
874 break;
875 }
876
877 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000878 .add(I.getOperand(1))
879 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000880 .addImm(0) // offset
881 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000882 .addImm(0) // slc
883 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000884
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000885
Tom Stellardca166212017-01-30 21:56:46 +0000886 // Now that we selected an opcode, we need to constrain the register
887 // operands to use appropriate classes.
888 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
889
890 I.eraseFromParent();
891 return Ret;
892}
893
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000894static int sizeToSubRegIndex(unsigned Size) {
895 switch (Size) {
896 case 32:
897 return AMDGPU::sub0;
898 case 64:
899 return AMDGPU::sub0_sub1;
900 case 96:
901 return AMDGPU::sub0_sub1_sub2;
902 case 128:
903 return AMDGPU::sub0_sub1_sub2_sub3;
904 case 256:
905 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
906 default:
907 if (Size < 32)
908 return AMDGPU::sub0;
909 if (Size > 256)
910 return -1;
911 return sizeToSubRegIndex(PowerOf2Ceil(Size));
912 }
913}
914
915bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
916 MachineBasicBlock *BB = I.getParent();
917 MachineFunction *MF = BB->getParent();
918 MachineRegisterInfo &MRI = MF->getRegInfo();
919
920 unsigned DstReg = I.getOperand(0).getReg();
921 unsigned SrcReg = I.getOperand(1).getReg();
922 const LLT DstTy = MRI.getType(DstReg);
923 const LLT SrcTy = MRI.getType(SrcReg);
924 if (!DstTy.isScalar())
925 return false;
926
927 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
928 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
929 if (SrcRB != DstRB)
930 return false;
931
932 unsigned DstSize = DstTy.getSizeInBits();
933 unsigned SrcSize = SrcTy.getSizeInBits();
934
935 const TargetRegisterClass *SrcRC
936 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
937 const TargetRegisterClass *DstRC
938 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
939
940 if (SrcSize > 32) {
941 int SubRegIdx = sizeToSubRegIndex(DstSize);
942 if (SubRegIdx == -1)
943 return false;
944
945 // Deal with weird cases where the class only partially supports the subreg
946 // index.
947 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
948 if (!SrcRC)
949 return false;
950
951 I.getOperand(1).setSubReg(SubRegIdx);
952 }
953
954 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
955 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
956 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
957 return false;
958 }
959
960 I.setDesc(TII.get(TargetOpcode::COPY));
961 return true;
962}
963
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000964/// \returns true if a bitmask for \p Size bits will be an inline immediate.
965static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
966 Mask = maskTrailingOnes<unsigned>(Size);
967 int SignedMask = static_cast<int>(Mask);
968 return SignedMask >= -16 && SignedMask <= 64;
969}
970
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000971bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
972 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
973 const DebugLoc &DL = I.getDebugLoc();
974 MachineBasicBlock &MBB = *I.getParent();
975 MachineFunction &MF = *MBB.getParent();
976 MachineRegisterInfo &MRI = MF.getRegInfo();
977 const unsigned DstReg = I.getOperand(0).getReg();
978 const unsigned SrcReg = I.getOperand(1).getReg();
979
980 const LLT DstTy = MRI.getType(DstReg);
981 const LLT SrcTy = MRI.getType(SrcReg);
982 const LLT S1 = LLT::scalar(1);
983 const unsigned SrcSize = SrcTy.getSizeInBits();
984 const unsigned DstSize = DstTy.getSizeInBits();
985 if (!DstTy.isScalar())
986 return false;
987
988 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
989
990 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
991 if (SrcTy != S1 || DstSize > 64) // Invalid
992 return false;
993
994 unsigned Opcode =
995 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
996 const TargetRegisterClass *DstRC =
997 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
998
999 // FIXME: Create an extra copy to avoid incorrectly constraining the result
1000 // of the scc producer.
1001 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1002 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
1003 .addReg(SrcReg);
1004 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1005 .addReg(TmpReg);
1006
1007 // The instruction operands are backwards from what you would expect.
1008 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
1009 .addImm(0)
1010 .addImm(Signed ? -1 : 1);
1011 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1012 }
1013
1014 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
1015 if (SrcTy != S1) // Invalid
1016 return false;
1017
1018 MachineInstr *ExtI =
1019 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
1020 .addImm(0) // src0_modifiers
1021 .addImm(0) // src0
1022 .addImm(0) // src1_modifiers
1023 .addImm(Signed ? -1 : 1) // src1
1024 .addUse(SrcReg);
1025 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1026 }
1027
1028 if (I.getOpcode() == AMDGPU::G_ANYEXT)
1029 return selectCOPY(I);
1030
1031 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
1032 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001033
1034 // Try to use an and with a mask if it will save code size.
1035 unsigned Mask;
1036 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1037 MachineInstr *ExtI =
1038 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
1039 .addImm(Mask)
1040 .addReg(SrcReg);
1041 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1042 }
1043
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001044 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
1045 MachineInstr *ExtI =
1046 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
1047 .addReg(SrcReg)
1048 .addImm(0) // Offset
1049 .addImm(SrcSize); // Width
1050 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1051 }
1052
1053 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1054 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
1055 return false;
1056
1057 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
1058 const unsigned SextOpc = SrcSize == 8 ?
1059 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
1060 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
1061 .addReg(SrcReg);
1062 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1063 }
1064
1065 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
1066 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1067
1068 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1069 if (DstSize > 32 && SrcSize <= 32) {
1070 // We need a 64-bit register source, but the high bits don't matter.
1071 unsigned ExtReg
1072 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1073 unsigned UndefReg
1074 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1075 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1076 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1077 .addReg(SrcReg)
1078 .addImm(AMDGPU::sub0)
1079 .addReg(UndefReg)
1080 .addImm(AMDGPU::sub1);
1081
1082 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1083 .addReg(ExtReg)
1084 .addImm(SrcSize << 16);
1085
1086 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1087 }
1088
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001089 unsigned Mask;
1090 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1091 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1092 .addReg(SrcReg)
1093 .addImm(Mask);
1094 } else {
1095 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1096 .addReg(SrcReg)
1097 .addImm(SrcSize << 16);
1098 }
1099
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001100 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1101 }
1102
1103 return false;
1104}
1105
Tom Stellardca166212017-01-30 21:56:46 +00001106bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1107 MachineBasicBlock *BB = I.getParent();
1108 MachineFunction *MF = BB->getParent();
1109 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001110 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001111
Tom Stellarde182b282018-05-15 17:57:09 +00001112 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1113 if (ImmOp.isFPImm()) {
1114 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1115 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1116 } else if (ImmOp.isCImm()) {
1117 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1118 }
1119
1120 unsigned DstReg = I.getOperand(0).getReg();
1121 unsigned Size;
1122 bool IsSgpr;
1123 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1124 if (RB) {
1125 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1126 Size = MRI.getType(DstReg).getSizeInBits();
1127 } else {
1128 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1129 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001130 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001131 }
1132
1133 if (Size != 32 && Size != 64)
1134 return false;
1135
1136 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001137 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001138 I.setDesc(TII.get(Opcode));
1139 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001140 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1141 }
1142
Tom Stellardca166212017-01-30 21:56:46 +00001143 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001144 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1145 &AMDGPU::VGPR_32RegClass;
1146 unsigned LoReg = MRI.createVirtualRegister(RC);
1147 unsigned HiReg = MRI.createVirtualRegister(RC);
1148 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001149
Tom Stellarde182b282018-05-15 17:57:09 +00001150 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001151 .addImm(Imm.trunc(32).getZExtValue());
1152
Tom Stellarde182b282018-05-15 17:57:09 +00001153 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001154 .addImm(Imm.ashr(32).getZExtValue());
1155
Tom Stellarde182b282018-05-15 17:57:09 +00001156 const MachineInstr *RS =
1157 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1158 .addReg(LoReg)
1159 .addImm(AMDGPU::sub0)
1160 .addReg(HiReg)
1161 .addImm(AMDGPU::sub1);
1162
Tom Stellardca166212017-01-30 21:56:46 +00001163 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1164 // work for target independent opcodes
1165 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001166 const TargetRegisterClass *DstRC =
1167 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1168 if (!DstRC)
1169 return true;
1170 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001171}
1172
1173static bool isConstant(const MachineInstr &MI) {
1174 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1175}
1176
1177void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1178 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1179
1180 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1181
1182 assert(PtrMI);
1183
1184 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1185 return;
1186
1187 GEPInfo GEPInfo(*PtrMI);
1188
1189 for (unsigned i = 1, e = 3; i < e; ++i) {
1190 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1191 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1192 assert(OpDef);
1193 if (isConstant(*OpDef)) {
1194 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1195 // are lacking other optimizations.
1196 assert(GEPInfo.Imm == 0);
1197 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1198 continue;
1199 }
1200 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1201 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1202 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1203 else
1204 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1205 }
1206
1207 AddrInfo.push_back(GEPInfo);
1208 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1209}
1210
Tom Stellard79b5c382019-02-20 21:02:37 +00001211bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001212 if (!MI.hasOneMemOperand())
1213 return false;
1214
1215 const MachineMemOperand *MMO = *MI.memoperands_begin();
1216 const Value *Ptr = MMO->getValue();
1217
1218 // UndefValue means this is a load of a kernel input. These are uniform.
1219 // Sometimes LDS instructions have constant pointers.
1220 // If Ptr is null, then that means this mem operand contains a
1221 // PseudoSourceValue like GOT.
1222 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1223 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1224 return true;
1225
Matt Arsenault923712b2018-02-09 16:57:57 +00001226 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1227 return true;
1228
Tom Stellardca166212017-01-30 21:56:46 +00001229 const Instruction *I = dyn_cast<Instruction>(Ptr);
1230 return I && I->getMetadata("amdgpu.uniform");
1231}
1232
Tom Stellardca166212017-01-30 21:56:46 +00001233bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1234 for (const GEPInfo &GEPInfo : AddrInfo) {
1235 if (!GEPInfo.VgprParts.empty())
1236 return true;
1237 }
1238 return false;
1239}
1240
Tom Stellardca166212017-01-30 21:56:46 +00001241bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
Matt Arsenault35c96592019-07-16 18:05:29 +00001242 // TODO: Can/should we insert m0 initialization here for DS instructions and
1243 // call the normal selector?
1244 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001245}
1246
Matt Arsenault64642802019-07-01 15:39:27 +00001247bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1248 MachineBasicBlock *BB = I.getParent();
1249 MachineFunction *MF = BB->getParent();
1250 MachineRegisterInfo &MRI = MF->getRegInfo();
1251 MachineOperand &CondOp = I.getOperand(0);
1252 Register CondReg = CondOp.getReg();
1253 const DebugLoc &DL = I.getDebugLoc();
1254
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001255 unsigned BrOpcode;
1256 Register CondPhysReg;
1257 const TargetRegisterClass *ConstrainRC;
1258
1259 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1260 // whether the branch is uniform when selecting the instruction. In
1261 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1262 // RegBankSelect knows what it's doing if the branch condition is scc, even
1263 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001264 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001265 CondPhysReg = AMDGPU::SCC;
1266 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1267 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1268 } else if (isVCC(CondReg, MRI)) {
1269 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1270 // We sort of know that a VCC producer based on the register bank, that ands
1271 // inactive lanes with 0. What if there was a logical operation with vcc
1272 // producers in different blocks/with different exec masks?
1273 // FIXME: Should scc->vcc copies and with exec?
1274 CondPhysReg = TRI.getVCC();
1275 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1276 ConstrainRC = TRI.getBoolRC();
1277 } else
1278 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001279
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001280 if (!MRI.getRegClassOrNull(CondReg))
1281 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001282
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001283 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1284 .addReg(CondReg);
1285 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1286 .addMBB(I.getOperand(1).getMBB());
1287
1288 I.eraseFromParent();
1289 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001290}
1291
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001292bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1293 MachineBasicBlock *BB = I.getParent();
1294 MachineFunction *MF = BB->getParent();
1295 MachineRegisterInfo &MRI = MF->getRegInfo();
1296
1297 Register DstReg = I.getOperand(0).getReg();
1298 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1299 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1300 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1301 if (IsVGPR)
1302 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1303
1304 return RBI.constrainGenericRegister(
1305 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1306}
1307
Daniel Sandersf76f3152017-11-16 00:46:35 +00001308bool AMDGPUInstructionSelector::select(MachineInstr &I,
1309 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001310 if (I.isPHI())
1311 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001312
Tom Stellard7712ee82018-06-22 00:44:29 +00001313 if (!isPreISelGenericOpcode(I.getOpcode())) {
1314 if (I.isCopy())
1315 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001316 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001317 }
Tom Stellardca166212017-01-30 21:56:46 +00001318
1319 switch (I.getOpcode()) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +00001320 case TargetOpcode::G_AND:
1321 case TargetOpcode::G_OR:
1322 case TargetOpcode::G_XOR:
1323 if (selectG_AND_OR_XOR(I))
1324 return true;
1325 return selectImpl(I, CoverageInfo);
Tom Stellard9e9dd302019-07-01 16:09:33 +00001326 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001327 case TargetOpcode::G_SUB:
1328 if (selectG_ADD_SUB(I))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001329 return true;
1330 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001331 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001332 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001333 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001334 case TargetOpcode::G_BITCAST:
1335 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001336 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001337 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001338 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001339 case TargetOpcode::G_EXTRACT:
1340 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001341 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001342 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001343 case TargetOpcode::G_CONCAT_VECTORS:
1344 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001345 case TargetOpcode::G_UNMERGE_VALUES:
1346 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001347 case TargetOpcode::G_GEP:
1348 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001349 case TargetOpcode::G_IMPLICIT_DEF:
1350 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001351 case TargetOpcode::G_INSERT:
1352 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001353 case TargetOpcode::G_INTRINSIC:
1354 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001355 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1356 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001357 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001358 if (selectG_ICMP(I))
1359 return true;
1360 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001361 case TargetOpcode::G_LOAD:
Matt Arsenault35c96592019-07-16 18:05:29 +00001362 return selectImpl(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001363 case TargetOpcode::G_SELECT:
1364 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001365 case TargetOpcode::G_STORE:
1366 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001367 case TargetOpcode::G_TRUNC:
1368 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001369 case TargetOpcode::G_SEXT:
1370 case TargetOpcode::G_ZEXT:
1371 case TargetOpcode::G_ANYEXT:
1372 if (selectG_SZA_EXT(I)) {
1373 I.eraseFromParent();
1374 return true;
1375 }
1376
1377 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001378 case TargetOpcode::G_BRCOND:
1379 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001380 case TargetOpcode::G_FRAME_INDEX:
1381 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001382 case TargetOpcode::G_FENCE:
1383 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1384 // is checking for G_CONSTANT
1385 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1386 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001387 }
1388 return false;
1389}
Tom Stellard1dc90202018-05-10 20:53:06 +00001390
Tom Stellard26fac0f2018-06-22 02:54:57 +00001391InstructionSelector::ComplexRendererFns
1392AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1393 return {{
1394 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1395 }};
1396
1397}
1398
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001399std::pair<Register, unsigned>
1400AMDGPUInstructionSelector::selectVOP3ModsImpl(
1401 Register Src, const MachineRegisterInfo &MRI) const {
1402 unsigned Mods = 0;
1403 MachineInstr *MI = MRI.getVRegDef(Src);
1404
1405 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1406 Src = MI->getOperand(1).getReg();
1407 Mods |= SISrcMods::NEG;
1408 MI = MRI.getVRegDef(Src);
1409 }
1410
1411 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1412 Src = MI->getOperand(1).getReg();
1413 Mods |= SISrcMods::ABS;
1414 }
1415
1416 return std::make_pair(Src, Mods);
1417}
1418
Tom Stellard1dc90202018-05-10 20:53:06 +00001419///
1420/// This will select either an SGPR or VGPR operand and will save us from
1421/// having to write an extra tablegen pattern.
1422InstructionSelector::ComplexRendererFns
1423AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1424 return {{
1425 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1426 }};
1427}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001428
1429InstructionSelector::ComplexRendererFns
1430AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001431 MachineRegisterInfo &MRI
1432 = Root.getParent()->getParent()->getParent()->getRegInfo();
1433
1434 Register Src;
1435 unsigned Mods;
1436 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1437
Tom Stellarddcc95e92018-05-11 05:44:16 +00001438 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001439 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1440 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1441 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1442 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001443 }};
1444}
Tom Stellard9a653572018-06-22 02:34:29 +00001445InstructionSelector::ComplexRendererFns
1446AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1447 return {{
1448 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1449 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1450 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1451 }};
1452}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001453
1454InstructionSelector::ComplexRendererFns
1455AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001456 MachineRegisterInfo &MRI
1457 = Root.getParent()->getParent()->getParent()->getRegInfo();
1458
1459 Register Src;
1460 unsigned Mods;
1461 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1462
Tom Stellard46bbbc32018-06-13 22:30:47 +00001463 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001464 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1465 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001466 }};
1467}
Tom Stellard79b5c382019-02-20 21:02:37 +00001468
1469InstructionSelector::ComplexRendererFns
1470AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1471 MachineRegisterInfo &MRI =
1472 Root.getParent()->getParent()->getParent()->getRegInfo();
1473
1474 SmallVector<GEPInfo, 4> AddrInfo;
1475 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1476
1477 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1478 return None;
1479
1480 const GEPInfo &GEPInfo = AddrInfo[0];
1481
1482 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1483 return None;
1484
1485 unsigned PtrReg = GEPInfo.SgprParts[0];
1486 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1487 return {{
1488 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1489 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1490 }};
1491}
1492
1493InstructionSelector::ComplexRendererFns
1494AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1495 MachineRegisterInfo &MRI =
1496 Root.getParent()->getParent()->getParent()->getRegInfo();
1497
1498 SmallVector<GEPInfo, 4> AddrInfo;
1499 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1500
1501 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1502 return None;
1503
1504 const GEPInfo &GEPInfo = AddrInfo[0];
1505 unsigned PtrReg = GEPInfo.SgprParts[0];
1506 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1507 if (!isUInt<32>(EncodedImm))
1508 return None;
1509
1510 return {{
1511 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1512 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1513 }};
1514}
1515
1516InstructionSelector::ComplexRendererFns
1517AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1518 MachineInstr *MI = Root.getParent();
1519 MachineBasicBlock *MBB = MI->getParent();
1520 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1521
1522 SmallVector<GEPInfo, 4> AddrInfo;
1523 getAddrModeInfo(*MI, MRI, AddrInfo);
1524
1525 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1526 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1527 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1528 return None;
1529
1530 const GEPInfo &GEPInfo = AddrInfo[0];
1531 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1532 return None;
1533
1534 // If we make it this far we have a load with an 32-bit immediate offset.
1535 // It is OK to select this using a sgpr offset, because we have already
1536 // failed trying to select this load into one of the _IMM variants since
1537 // the _IMM Patterns are considered before the _SGPR patterns.
1538 unsigned PtrReg = GEPInfo.SgprParts[0];
1539 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1540 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1541 .addImm(GEPInfo.Imm);
1542 return {{
1543 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1544 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1545 }};
1546}
Matt Arsenault35c96592019-07-16 18:05:29 +00001547
1548 template <bool Signed>
1549InstructionSelector::ComplexRendererFns
1550AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1551 MachineInstr *MI = Root.getParent();
1552 MachineBasicBlock *MBB = MI->getParent();
1553 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1554
1555 InstructionSelector::ComplexRendererFns Default = {{
1556 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1557 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset
1558 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
1559 }};
1560
1561 if (!STI.hasFlatInstOffsets())
1562 return Default;
1563
1564 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
1565 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1566 return Default;
1567
1568 Optional<int64_t> Offset =
1569 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
1570 if (!Offset.hasValue())
1571 return Default;
1572
1573 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1574 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1575 return Default;
1576
1577 Register BasePtr = OpDef->getOperand(1).getReg();
1578
1579 return {{
1580 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1581 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1582 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
1583 }};
1584}
1585
1586InstructionSelector::ComplexRendererFns
1587AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
1588 return selectFlatOffsetImpl<false>(Root);
1589}
1590
1591InstructionSelector::ComplexRendererFns
1592AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
1593 return selectFlatOffsetImpl<true>(Root);
1594}