blob: aa634e881d8709f241ece171785816bc28ace5f8 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenaultb1cc4f52018-06-25 16:17:48 +000020#include "SIMachineFunctionInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000021#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000024#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000025#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33
34#define DEBUG_TYPE "amdgpu-isel"
35
36using namespace llvm;
37
Tom Stellard1dc90202018-05-10 20:53:06 +000038#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000039#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000040#include "AMDGPUGenGlobalISel.inc"
41#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043
Tom Stellardca166212017-01-30 21:56:46 +000044AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000045 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000046 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000047 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000048 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49 STI(STI),
50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51#define GET_GLOBALISEL_PREDICATES_INIT
52#include "AMDGPUGenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATES_INIT
54#define GET_GLOBALISEL_TEMPORARIES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000057{
58}
59
60const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000061
Matt Arsenault2ab25f92019-07-01 16:06:02 +000062static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63 if (TargetRegisterInfo::isPhysicalRegister(Reg))
64 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000065
66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
67 const TargetRegisterClass *RC =
68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000069 if (RC) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +000070 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
71 // context of the register bank has been lost.
Matt Arsenault1daad912019-07-01 15:23:04 +000072 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
73 return false;
74 const LLT Ty = MRI.getType(Reg);
75 return Ty.isValid() && Ty.getSizeInBits() == 1;
76 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000077
78 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
79 return RB->getID() == AMDGPU::SCCRegBankID;
80}
81
Matt Arsenault2ab25f92019-07-01 16:06:02 +000082bool AMDGPUInstructionSelector::isVCC(Register Reg,
83 const MachineRegisterInfo &MRI) const {
84 if (TargetRegisterInfo::isPhysicalRegister(Reg))
85 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000086
87 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
88 const TargetRegisterClass *RC =
89 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
90 if (RC) {
Matt Arsenault18b71332019-07-15 19:44:07 +000091 const LLT Ty = MRI.getType(Reg);
Matt Arsenault2ab25f92019-07-01 16:06:02 +000092 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault18b71332019-07-15 19:44:07 +000093 Ty.isValid() && Ty.getSizeInBits() == 1;
Matt Arsenault9f992c22019-07-01 13:22:07 +000094 }
95
96 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
97 return RB->getID() == AMDGPU::VCCRegBankID;
98}
99
Tom Stellard1e0edad2018-05-10 21:20:10 +0000100bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Matt Arsenault18b71332019-07-15 19:44:07 +0000101 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard1e0edad2018-05-10 21:20:10 +0000102 MachineBasicBlock *BB = I.getParent();
103 MachineFunction *MF = BB->getParent();
104 MachineRegisterInfo &MRI = MF->getRegInfo();
105 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000106
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000107 const MachineOperand &Src = I.getOperand(1);
Matt Arsenault18b71332019-07-15 19:44:07 +0000108 MachineOperand &Dst = I.getOperand(0);
109 Register DstReg = Dst.getReg();
110 Register SrcReg = Src.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000111
Matt Arsenault18b71332019-07-15 19:44:07 +0000112 if (isVCC(DstReg, MRI)) {
113 if (SrcReg == AMDGPU::SCC) {
114 const TargetRegisterClass *RC
115 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
116 if (!RC)
117 return true;
118 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000119 }
Matt Arsenault18b71332019-07-15 19:44:07 +0000120
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000121 if (!isVCC(SrcReg, MRI)) {
122 // TODO: Should probably leave the copy and let copyPhysReg expand it.
123 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
124 return false;
Matt Arsenault3bfdb542019-07-15 19:45:49 +0000125
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000126 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
127 .addImm(0)
128 .addReg(SrcReg);
Matt Arsenault18b71332019-07-15 19:44:07 +0000129
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000130 if (!MRI.getRegClassOrNull(SrcReg))
131 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
132 I.eraseFromParent();
133 return true;
134 }
Matt Arsenaultad19b502019-07-15 19:48:36 +0000135
136 const TargetRegisterClass *RC =
137 TRI.getConstrainedRegClassForOperand(Dst, MRI);
138 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
139 return false;
140
141 // Don't constrain the source register to a class so the def instruction
142 // handles it (unless it's undef).
143 //
144 // FIXME: This is a hack. When selecting the def, we neeed to know
145 // specifically know that the result is VCCRegBank, and not just an SGPR
146 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
147 if (Src.isUndef()) {
148 const TargetRegisterClass *SrcRC =
149 TRI.getConstrainedRegClassForOperand(Src, MRI);
150 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
151 return false;
152 }
153
154 return true;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000155 }
156
Tom Stellard1e0edad2018-05-10 21:20:10 +0000157 for (const MachineOperand &MO : I.operands()) {
158 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
159 continue;
160
161 const TargetRegisterClass *RC =
162 TRI.getConstrainedRegClassForOperand(MO, MRI);
163 if (!RC)
164 continue;
165 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
166 }
167 return true;
168}
169
Matt Arsenaulte1006252019-07-01 16:32:47 +0000170bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
171 MachineBasicBlock *BB = I.getParent();
172 MachineFunction *MF = BB->getParent();
173 MachineRegisterInfo &MRI = MF->getRegInfo();
174
175 const Register DefReg = I.getOperand(0).getReg();
176 const LLT DefTy = MRI.getType(DefReg);
177
178 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
179
180 const RegClassOrRegBank &RegClassOrBank =
181 MRI.getRegClassOrRegBank(DefReg);
182
183 const TargetRegisterClass *DefRC
184 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
185 if (!DefRC) {
186 if (!DefTy.isValid()) {
187 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
188 return false;
189 }
190
191 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
192 if (RB.getID() == AMDGPU::SCCRegBankID) {
193 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
194 return false;
195 }
196
197 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
198 if (!DefRC) {
199 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
200 return false;
201 }
202 }
203
204 I.setDesc(TII.get(TargetOpcode::PHI));
205 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
206}
207
Tom Stellardca166212017-01-30 21:56:46 +0000208MachineOperand
209AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000210 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000211 unsigned SubIdx) const {
212
213 MachineInstr *MI = MO.getParent();
214 MachineBasicBlock *BB = MO.getParent()->getParent();
215 MachineFunction *MF = BB->getParent();
216 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000217 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000218
219 if (MO.isReg()) {
220 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
221 unsigned Reg = MO.getReg();
222 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
223 .addReg(Reg, 0, ComposedSubIdx);
224
225 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
226 MO.isKill(), MO.isDead(), MO.isUndef(),
227 MO.isEarlyClobber(), 0, MO.isDebug(),
228 MO.isInternalRead());
229 }
230
231 assert(MO.isImm());
232
233 APInt Imm(64, MO.getImm());
234
235 switch (SubIdx) {
236 default:
237 llvm_unreachable("do not know to split immediate with this sub index.");
238 case AMDGPU::sub0:
239 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
240 case AMDGPU::sub1:
241 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
242 }
243}
244
Tom Stellard390a5f42018-07-13 21:05:14 +0000245static int64_t getConstant(const MachineInstr *MI) {
246 return MI->getOperand(1).getCImm()->getSExtValue();
247}
248
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000249static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
250 switch (Opc) {
251 case AMDGPU::G_AND:
252 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
253 case AMDGPU::G_OR:
254 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
255 case AMDGPU::G_XOR:
256 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
257 default:
258 llvm_unreachable("not a bit op");
259 }
260}
261
262bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
263 MachineBasicBlock *BB = I.getParent();
264 MachineFunction *MF = BB->getParent();
265 MachineRegisterInfo &MRI = MF->getRegInfo();
266 MachineOperand &Dst = I.getOperand(0);
267 MachineOperand &Src0 = I.getOperand(1);
268 MachineOperand &Src1 = I.getOperand(2);
269 Register DstReg = Dst.getReg();
270 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
271
272 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
273 if (DstRB->getID() == AMDGPU::VCCRegBankID) {
274 const TargetRegisterClass *RC = TRI.getBoolRC();
275 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
276 RC == &AMDGPU::SReg_64RegClass);
277 I.setDesc(TII.get(InstOpc));
278
279 // FIXME: Hack to avoid turning the register bank into a register class.
280 // The selector for G_ICMP relies on seeing the register bank for the result
281 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
282 // be ambiguous whether it's a scalar or vector bool.
283 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
284 MRI.setRegClass(Src0.getReg(), RC);
285 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
286 MRI.setRegClass(Src1.getReg(), RC);
287
288 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
289 }
290
291 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
292 // the result?
293 if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000294 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
295 I.setDesc(TII.get(InstOpc));
Matt Arsenault22c4a142019-07-16 14:28:30 +0000296
297 const TargetRegisterClass *RC
298 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
299 if (!RC)
300 return false;
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000301 return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
302 RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
303 RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
304 }
305
306 return false;
307}
308
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000309bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000310 MachineBasicBlock *BB = I.getParent();
311 MachineFunction *MF = BB->getParent();
312 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000313 Register DstReg = I.getOperand(0).getReg();
314 const DebugLoc &DL = I.getDebugLoc();
315 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
316 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
317 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000318 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000319
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000320 if (Size == 32) {
321 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000322 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000323 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000324 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000325 .add(I.getOperand(1))
326 .add(I.getOperand(2));
327 I.eraseFromParent();
328 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
329 }
Tom Stellardca166212017-01-30 21:56:46 +0000330
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000331 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000332 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
333 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000334 I.addOperand(*MF, MachineOperand::CreateImm(0));
335 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
336 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
337 }
Tom Stellardca166212017-01-30 21:56:46 +0000338
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000339 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
340
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000341 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
342 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000343 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000344 .addDef(UnusedCarry, RegState::Dead)
345 .add(I.getOperand(1))
346 .add(I.getOperand(2))
347 .addImm(0);
348 I.eraseFromParent();
349 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000350 }
351
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000352 assert(!Sub && "illegal sub should not reach here");
353
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000354 const TargetRegisterClass &RC
355 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
356 const TargetRegisterClass &HalfRC
357 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
358
359 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
360 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
361 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
362 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
363
364 Register DstLo = MRI.createVirtualRegister(&HalfRC);
365 Register DstHi = MRI.createVirtualRegister(&HalfRC);
366
367 if (IsSALU) {
368 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
369 .add(Lo1)
370 .add(Lo2);
371 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
372 .add(Hi1)
373 .add(Hi2);
374 } else {
375 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
376 Register CarryReg = MRI.createVirtualRegister(CarryRC);
377 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
378 .addDef(CarryReg)
379 .add(Lo1)
380 .add(Lo2)
381 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000382 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000383 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
384 .add(Hi1)
385 .add(Hi2)
386 .addReg(CarryReg, RegState::Kill)
387 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000388
389 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
390 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000391 }
392
393 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
394 .addReg(DstLo)
395 .addImm(AMDGPU::sub0)
396 .addReg(DstHi)
397 .addImm(AMDGPU::sub1);
398
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000399
400 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000401 return false;
402
Tom Stellardca166212017-01-30 21:56:46 +0000403 I.eraseFromParent();
404 return true;
405}
406
Tom Stellard41f32192019-02-28 23:37:48 +0000407bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
408 MachineBasicBlock *BB = I.getParent();
409 MachineFunction *MF = BB->getParent();
410 MachineRegisterInfo &MRI = MF->getRegInfo();
411 assert(I.getOperand(2).getImm() % 32 == 0);
412 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
413 const DebugLoc &DL = I.getDebugLoc();
414 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
415 I.getOperand(0).getReg())
416 .addReg(I.getOperand(1).getReg(), 0, SubReg);
417
418 for (const MachineOperand &MO : Copy->operands()) {
419 const TargetRegisterClass *RC =
420 TRI.getConstrainedRegClassForOperand(MO, MRI);
421 if (!RC)
422 continue;
423 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
424 }
425 I.eraseFromParent();
426 return true;
427}
428
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000429bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
430 MachineBasicBlock *BB = MI.getParent();
431 MachineFunction *MF = BB->getParent();
432 MachineRegisterInfo &MRI = MF->getRegInfo();
433 Register DstReg = MI.getOperand(0).getReg();
434 LLT DstTy = MRI.getType(DstReg);
435 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
436
437 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000438 if (SrcSize < 32)
439 return false;
440
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000441 const DebugLoc &DL = MI.getDebugLoc();
442 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
443 const unsigned DstSize = DstTy.getSizeInBits();
444 const TargetRegisterClass *DstRC =
445 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
446 if (!DstRC)
447 return false;
448
449 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
450 MachineInstrBuilder MIB =
451 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
452 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
453 MachineOperand &Src = MI.getOperand(I + 1);
454 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
455 MIB.addImm(SubRegs[I]);
456
457 const TargetRegisterClass *SrcRC
458 = TRI.getConstrainedRegClassForOperand(Src, MRI);
459 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
460 return false;
461 }
462
463 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
464 return false;
465
466 MI.eraseFromParent();
467 return true;
468}
469
Matt Arsenault872f38b2019-07-09 14:02:26 +0000470bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
471 MachineBasicBlock *BB = MI.getParent();
472 MachineFunction *MF = BB->getParent();
473 MachineRegisterInfo &MRI = MF->getRegInfo();
474 const int NumDst = MI.getNumOperands() - 1;
475
476 MachineOperand &Src = MI.getOperand(NumDst);
477
478 Register SrcReg = Src.getReg();
479 Register DstReg0 = MI.getOperand(0).getReg();
480 LLT DstTy = MRI.getType(DstReg0);
481 LLT SrcTy = MRI.getType(SrcReg);
482
483 const unsigned DstSize = DstTy.getSizeInBits();
484 const unsigned SrcSize = SrcTy.getSizeInBits();
485 const DebugLoc &DL = MI.getDebugLoc();
486 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
487
488 const TargetRegisterClass *SrcRC =
489 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
490 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
491 return false;
492
493 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
494
495 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
496 // source, and this relies on the fact that the same subregister indices are
497 // used for both.
498 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
499 for (int I = 0, E = NumDst; I != E; ++I) {
500 MachineOperand &Dst = MI.getOperand(I);
501 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
502 .addReg(SrcReg, SrcFlags, SubRegs[I]);
503
504 const TargetRegisterClass *DstRC =
505 TRI.getConstrainedRegClassForOperand(Dst, MRI);
506 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
507 return false;
508 }
509
510 MI.eraseFromParent();
511 return true;
512}
513
Tom Stellardca166212017-01-30 21:56:46 +0000514bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000515 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000516}
517
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000518bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
519 MachineBasicBlock *BB = I.getParent();
520 MachineFunction *MF = BB->getParent();
521 MachineRegisterInfo &MRI = MF->getRegInfo();
522 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000523
524 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
525 // regbank check here is to know why getConstrainedRegClassForOperand failed.
526 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
527 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
528 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
529 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
530 return true;
531 }
532
533 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000534}
535
Tom Stellard33634d1b2019-03-01 00:50:26 +0000536bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
537 MachineBasicBlock *BB = I.getParent();
538 MachineFunction *MF = BB->getParent();
539 MachineRegisterInfo &MRI = MF->getRegInfo();
540 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
541 DebugLoc DL = I.getDebugLoc();
542 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
543 .addDef(I.getOperand(0).getReg())
544 .addReg(I.getOperand(1).getReg())
545 .addReg(I.getOperand(2).getReg())
546 .addImm(SubReg);
547
548 for (const MachineOperand &MO : Ins->operands()) {
549 if (!MO.isReg())
550 continue;
551 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
552 continue;
553
554 const TargetRegisterClass *RC =
555 TRI.getConstrainedRegClassForOperand(MO, MRI);
556 if (!RC)
557 continue;
558 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
559 }
560 I.eraseFromParent();
561 return true;
562}
563
Matt Arsenault50be3482019-07-02 14:52:16 +0000564bool AMDGPUInstructionSelector::selectG_INTRINSIC(
565 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000566 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000567 switch (IntrinsicID) {
Tom Stellardac684712018-07-13 22:16:03 +0000568 case Intrinsic::maxnum:
569 case Intrinsic::minnum:
Tom Stellarda9284732018-06-14 19:26:37 +0000570 case Intrinsic::amdgcn_cvt_pkrtz:
571 return selectImpl(I, CoverageInfo);
Matt Arsenault53fa7592019-07-15 18:25:24 +0000572 case Intrinsic::amdgcn_if_break: {
573 MachineBasicBlock *BB = I.getParent();
574 MachineFunction *MF = BB->getParent();
575 MachineRegisterInfo &MRI = MF->getRegInfo();
576
577 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
578 // SelectionDAG uses for wave32 vs wave64.
579 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
580 .add(I.getOperand(0))
581 .add(I.getOperand(2))
582 .add(I.getOperand(3));
583
584 Register DstReg = I.getOperand(0).getReg();
585 Register Src0Reg = I.getOperand(2).getReg();
586 Register Src1Reg = I.getOperand(3).getReg();
587
588 I.eraseFromParent();
589
590 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
591 if (!MRI.getRegClassOrNull(Reg))
592 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
593 }
594
595 return true;
596 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000597 default:
598 return selectImpl(I, CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000599 }
Tom Stellarda9284732018-06-14 19:26:37 +0000600}
601
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000602static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
603 if (Size != 32 && Size != 64)
604 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000605 switch (P) {
606 default:
607 llvm_unreachable("Unknown condition code!");
608 case CmpInst::ICMP_NE:
609 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
610 case CmpInst::ICMP_EQ:
611 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
612 case CmpInst::ICMP_SGT:
613 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
614 case CmpInst::ICMP_SGE:
615 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
616 case CmpInst::ICMP_SLT:
617 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
618 case CmpInst::ICMP_SLE:
619 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
620 case CmpInst::ICMP_UGT:
621 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
622 case CmpInst::ICMP_UGE:
623 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
624 case CmpInst::ICMP_ULT:
625 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
626 case CmpInst::ICMP_ULE:
627 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
628 }
629}
630
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000631int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
632 unsigned Size) const {
633 if (Size == 64) {
634 if (!STI.hasScalarCompareEq64())
635 return -1;
636
637 switch (P) {
638 case CmpInst::ICMP_NE:
639 return AMDGPU::S_CMP_LG_U64;
640 case CmpInst::ICMP_EQ:
641 return AMDGPU::S_CMP_EQ_U64;
642 default:
643 return -1;
644 }
645 }
646
647 if (Size != 32)
648 return -1;
649
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000650 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000651 case CmpInst::ICMP_NE:
652 return AMDGPU::S_CMP_LG_U32;
653 case CmpInst::ICMP_EQ:
654 return AMDGPU::S_CMP_EQ_U32;
655 case CmpInst::ICMP_SGT:
656 return AMDGPU::S_CMP_GT_I32;
657 case CmpInst::ICMP_SGE:
658 return AMDGPU::S_CMP_GE_I32;
659 case CmpInst::ICMP_SLT:
660 return AMDGPU::S_CMP_LT_I32;
661 case CmpInst::ICMP_SLE:
662 return AMDGPU::S_CMP_LE_I32;
663 case CmpInst::ICMP_UGT:
664 return AMDGPU::S_CMP_GT_U32;
665 case CmpInst::ICMP_UGE:
666 return AMDGPU::S_CMP_GE_U32;
667 case CmpInst::ICMP_ULT:
668 return AMDGPU::S_CMP_LT_U32;
669 case CmpInst::ICMP_ULE:
670 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000671 default:
672 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000673 }
674}
675
676bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
677 MachineBasicBlock *BB = I.getParent();
678 MachineFunction *MF = BB->getParent();
679 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000680 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000681
682 unsigned SrcReg = I.getOperand(2).getReg();
683 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000684
685 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000686
687 unsigned CCReg = I.getOperand(0).getReg();
688 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000689 int Opcode = getS_CMPOpcode(Pred, Size);
690 if (Opcode == -1)
691 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000692 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
693 .add(I.getOperand(2))
694 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000695 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
696 .addReg(AMDGPU::SCC);
697 bool Ret =
698 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
699 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000700 I.eraseFromParent();
701 return Ret;
702 }
703
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000704 int Opcode = getV_CMPOpcode(Pred, Size);
705 if (Opcode == -1)
706 return false;
707
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000708 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
709 I.getOperand(0).getReg())
710 .add(I.getOperand(2))
711 .add(I.getOperand(3));
712 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000713 *TRI.getBoolRC(), MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000714 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
715 I.eraseFromParent();
716 return Ret;
717}
718
Tom Stellard390a5f42018-07-13 21:05:14 +0000719static MachineInstr *
720buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
721 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
722 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
723 const DebugLoc &DL = Insert->getDebugLoc();
724 MachineBasicBlock &BB = *Insert->getParent();
725 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
726 return BuildMI(BB, Insert, DL, TII.get(Opcode))
727 .addImm(Tgt)
728 .addReg(Reg0)
729 .addReg(Reg1)
730 .addReg(Reg2)
731 .addReg(Reg3)
732 .addImm(VM)
733 .addImm(Compr)
734 .addImm(Enabled);
735}
736
737bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Matt Arsenault50be3482019-07-02 14:52:16 +0000738 MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000739 MachineBasicBlock *BB = I.getParent();
740 MachineFunction *MF = BB->getParent();
741 MachineRegisterInfo &MRI = MF->getRegInfo();
742
743 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
744 switch (IntrinsicID) {
745 case Intrinsic::amdgcn_exp: {
746 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
747 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
748 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
749 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
750
751 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
752 I.getOperand(4).getReg(),
753 I.getOperand(5).getReg(),
754 I.getOperand(6).getReg(),
755 VM, false, Enabled, Done);
756
757 I.eraseFromParent();
758 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
759 }
760 case Intrinsic::amdgcn_exp_compr: {
761 const DebugLoc &DL = I.getDebugLoc();
762 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
763 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
764 unsigned Reg0 = I.getOperand(3).getReg();
765 unsigned Reg1 = I.getOperand(4).getReg();
766 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
767 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
768 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
769
770 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
771 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
772 true, Enabled, Done);
773
774 I.eraseFromParent();
775 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
776 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000777 case Intrinsic::amdgcn_end_cf: {
778 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
779 // SelectionDAG uses for wave32 vs wave64.
780 BuildMI(*BB, &I, I.getDebugLoc(),
781 TII.get(AMDGPU::SI_END_CF))
782 .add(I.getOperand(1));
783
784 Register Reg = I.getOperand(1).getReg();
785 I.eraseFromParent();
786
787 if (!MRI.getRegClassOrNull(Reg))
788 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
789 return true;
790 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000791 default:
792 return selectImpl(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000793 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000794}
795
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000796bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
797 MachineBasicBlock *BB = I.getParent();
798 MachineFunction *MF = BB->getParent();
799 MachineRegisterInfo &MRI = MF->getRegInfo();
800 const DebugLoc &DL = I.getDebugLoc();
801
802 unsigned DstReg = I.getOperand(0).getReg();
803 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000804 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000805 const MachineOperand &CCOp = I.getOperand(1);
806 unsigned CCReg = CCOp.getReg();
807 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000808 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
809 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000810 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
811 .addReg(CCReg);
812
813 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
814 // bank, because it does not cover the register class that we used to represent
815 // for it. So we need to manually set the register class here.
816 if (!MRI.getRegClassOrNull(CCReg))
817 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
818 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
819 .add(I.getOperand(2))
820 .add(I.getOperand(3));
821
822 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
823 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
824 I.eraseFromParent();
825 return Ret;
826 }
827
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000828 // Wide VGPR select should have been split in RegBankSelect.
829 if (Size > 32)
830 return false;
831
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000832 MachineInstr *Select =
833 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
834 .addImm(0)
835 .add(I.getOperand(3))
836 .addImm(0)
837 .add(I.getOperand(2))
838 .add(I.getOperand(1));
839
840 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
841 I.eraseFromParent();
842 return Ret;
843}
844
Tom Stellardca166212017-01-30 21:56:46 +0000845bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
846 MachineBasicBlock *BB = I.getParent();
Tom Stellard655fdd32018-05-11 23:12:49 +0000847 MachineFunction *MF = BB->getParent();
848 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardca166212017-01-30 21:56:46 +0000849 DebugLoc DL = I.getDebugLoc();
Matt Arsenault89fc8bc2019-07-01 13:37:39 +0000850 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
851 if (PtrSize != 64) {
852 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
853 return false;
854 }
855
Tom Stellard655fdd32018-05-11 23:12:49 +0000856 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
857 unsigned Opcode;
Tom Stellardca166212017-01-30 21:56:46 +0000858
859 // FIXME: Select store instruction based on address space
Tom Stellard655fdd32018-05-11 23:12:49 +0000860 switch (StoreSize) {
861 default:
862 return false;
863 case 32:
864 Opcode = AMDGPU::FLAT_STORE_DWORD;
865 break;
866 case 64:
867 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
868 break;
869 case 96:
870 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
871 break;
872 case 128:
873 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
874 break;
875 }
876
877 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
Tom Stellardca166212017-01-30 21:56:46 +0000878 .add(I.getOperand(1))
879 .add(I.getOperand(0))
Matt Arsenaultfd023142017-06-12 15:55:58 +0000880 .addImm(0) // offset
881 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000882 .addImm(0) // slc
883 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +0000884
Matt Arsenault47ccafe2017-05-11 17:38:33 +0000885
Tom Stellardca166212017-01-30 21:56:46 +0000886 // Now that we selected an opcode, we need to constrain the register
887 // operands to use appropriate classes.
888 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
889
890 I.eraseFromParent();
891 return Ret;
892}
893
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000894static int sizeToSubRegIndex(unsigned Size) {
895 switch (Size) {
896 case 32:
897 return AMDGPU::sub0;
898 case 64:
899 return AMDGPU::sub0_sub1;
900 case 96:
901 return AMDGPU::sub0_sub1_sub2;
902 case 128:
903 return AMDGPU::sub0_sub1_sub2_sub3;
904 case 256:
905 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
906 default:
907 if (Size < 32)
908 return AMDGPU::sub0;
909 if (Size > 256)
910 return -1;
911 return sizeToSubRegIndex(PowerOf2Ceil(Size));
912 }
913}
914
915bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
916 MachineBasicBlock *BB = I.getParent();
917 MachineFunction *MF = BB->getParent();
918 MachineRegisterInfo &MRI = MF->getRegInfo();
919
920 unsigned DstReg = I.getOperand(0).getReg();
921 unsigned SrcReg = I.getOperand(1).getReg();
922 const LLT DstTy = MRI.getType(DstReg);
923 const LLT SrcTy = MRI.getType(SrcReg);
924 if (!DstTy.isScalar())
925 return false;
926
927 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
928 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
929 if (SrcRB != DstRB)
930 return false;
931
932 unsigned DstSize = DstTy.getSizeInBits();
933 unsigned SrcSize = SrcTy.getSizeInBits();
934
935 const TargetRegisterClass *SrcRC
936 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
937 const TargetRegisterClass *DstRC
938 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
939
940 if (SrcSize > 32) {
941 int SubRegIdx = sizeToSubRegIndex(DstSize);
942 if (SubRegIdx == -1)
943 return false;
944
945 // Deal with weird cases where the class only partially supports the subreg
946 // index.
947 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
948 if (!SrcRC)
949 return false;
950
951 I.getOperand(1).setSubReg(SubRegIdx);
952 }
953
954 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
955 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
956 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
957 return false;
958 }
959
960 I.setDesc(TII.get(TargetOpcode::COPY));
961 return true;
962}
963
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000964/// \returns true if a bitmask for \p Size bits will be an inline immediate.
965static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
966 Mask = maskTrailingOnes<unsigned>(Size);
967 int SignedMask = static_cast<int>(Mask);
968 return SignedMask >= -16 && SignedMask <= 64;
969}
970
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000971bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
972 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
973 const DebugLoc &DL = I.getDebugLoc();
974 MachineBasicBlock &MBB = *I.getParent();
975 MachineFunction &MF = *MBB.getParent();
976 MachineRegisterInfo &MRI = MF.getRegInfo();
977 const unsigned DstReg = I.getOperand(0).getReg();
978 const unsigned SrcReg = I.getOperand(1).getReg();
979
980 const LLT DstTy = MRI.getType(DstReg);
981 const LLT SrcTy = MRI.getType(SrcReg);
982 const LLT S1 = LLT::scalar(1);
983 const unsigned SrcSize = SrcTy.getSizeInBits();
984 const unsigned DstSize = DstTy.getSizeInBits();
985 if (!DstTy.isScalar())
986 return false;
987
988 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
989
990 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
991 if (SrcTy != S1 || DstSize > 64) // Invalid
992 return false;
993
994 unsigned Opcode =
995 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
996 const TargetRegisterClass *DstRC =
997 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
998
999 // FIXME: Create an extra copy to avoid incorrectly constraining the result
1000 // of the scc producer.
1001 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1002 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
1003 .addReg(SrcReg);
1004 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1005 .addReg(TmpReg);
1006
1007 // The instruction operands are backwards from what you would expect.
1008 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
1009 .addImm(0)
1010 .addImm(Signed ? -1 : 1);
1011 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1012 }
1013
1014 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
1015 if (SrcTy != S1) // Invalid
1016 return false;
1017
1018 MachineInstr *ExtI =
1019 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
1020 .addImm(0) // src0_modifiers
1021 .addImm(0) // src0
1022 .addImm(0) // src1_modifiers
1023 .addImm(Signed ? -1 : 1) // src1
1024 .addUse(SrcReg);
1025 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1026 }
1027
1028 if (I.getOpcode() == AMDGPU::G_ANYEXT)
1029 return selectCOPY(I);
1030
1031 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
1032 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001033
1034 // Try to use an and with a mask if it will save code size.
1035 unsigned Mask;
1036 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1037 MachineInstr *ExtI =
1038 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
1039 .addImm(Mask)
1040 .addReg(SrcReg);
1041 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1042 }
1043
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001044 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
1045 MachineInstr *ExtI =
1046 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
1047 .addReg(SrcReg)
1048 .addImm(0) // Offset
1049 .addImm(SrcSize); // Width
1050 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1051 }
1052
1053 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1054 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
1055 return false;
1056
1057 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
1058 const unsigned SextOpc = SrcSize == 8 ?
1059 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
1060 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
1061 .addReg(SrcReg);
1062 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1063 }
1064
1065 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
1066 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1067
1068 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1069 if (DstSize > 32 && SrcSize <= 32) {
1070 // We need a 64-bit register source, but the high bits don't matter.
1071 unsigned ExtReg
1072 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1073 unsigned UndefReg
1074 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1075 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1076 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1077 .addReg(SrcReg)
1078 .addImm(AMDGPU::sub0)
1079 .addReg(UndefReg)
1080 .addImm(AMDGPU::sub1);
1081
1082 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1083 .addReg(ExtReg)
1084 .addImm(SrcSize << 16);
1085
1086 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1087 }
1088
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001089 unsigned Mask;
1090 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1091 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1092 .addReg(SrcReg)
1093 .addImm(Mask);
1094 } else {
1095 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1096 .addReg(SrcReg)
1097 .addImm(SrcSize << 16);
1098 }
1099
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001100 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1101 }
1102
1103 return false;
1104}
1105
Tom Stellardca166212017-01-30 21:56:46 +00001106bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1107 MachineBasicBlock *BB = I.getParent();
1108 MachineFunction *MF = BB->getParent();
1109 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001110 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001111
Tom Stellarde182b282018-05-15 17:57:09 +00001112 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1113 if (ImmOp.isFPImm()) {
1114 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1115 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1116 } else if (ImmOp.isCImm()) {
1117 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1118 }
1119
1120 unsigned DstReg = I.getOperand(0).getReg();
1121 unsigned Size;
1122 bool IsSgpr;
1123 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1124 if (RB) {
1125 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1126 Size = MRI.getType(DstReg).getSizeInBits();
1127 } else {
1128 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1129 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001130 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001131 }
1132
1133 if (Size != 32 && Size != 64)
1134 return false;
1135
1136 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001137 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001138 I.setDesc(TII.get(Opcode));
1139 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001140 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1141 }
1142
Tom Stellardca166212017-01-30 21:56:46 +00001143 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001144 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1145 &AMDGPU::VGPR_32RegClass;
1146 unsigned LoReg = MRI.createVirtualRegister(RC);
1147 unsigned HiReg = MRI.createVirtualRegister(RC);
1148 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001149
Tom Stellarde182b282018-05-15 17:57:09 +00001150 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001151 .addImm(Imm.trunc(32).getZExtValue());
1152
Tom Stellarde182b282018-05-15 17:57:09 +00001153 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001154 .addImm(Imm.ashr(32).getZExtValue());
1155
Tom Stellarde182b282018-05-15 17:57:09 +00001156 const MachineInstr *RS =
1157 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1158 .addReg(LoReg)
1159 .addImm(AMDGPU::sub0)
1160 .addReg(HiReg)
1161 .addImm(AMDGPU::sub1);
1162
Tom Stellardca166212017-01-30 21:56:46 +00001163 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1164 // work for target independent opcodes
1165 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001166 const TargetRegisterClass *DstRC =
1167 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1168 if (!DstRC)
1169 return true;
1170 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001171}
1172
1173static bool isConstant(const MachineInstr &MI) {
1174 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1175}
1176
1177void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1178 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1179
1180 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1181
1182 assert(PtrMI);
1183
1184 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1185 return;
1186
1187 GEPInfo GEPInfo(*PtrMI);
1188
1189 for (unsigned i = 1, e = 3; i < e; ++i) {
1190 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1191 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1192 assert(OpDef);
1193 if (isConstant(*OpDef)) {
1194 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1195 // are lacking other optimizations.
1196 assert(GEPInfo.Imm == 0);
1197 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1198 continue;
1199 }
1200 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1201 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1202 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1203 else
1204 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1205 }
1206
1207 AddrInfo.push_back(GEPInfo);
1208 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1209}
1210
Tom Stellard79b5c382019-02-20 21:02:37 +00001211bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001212 if (!MI.hasOneMemOperand())
1213 return false;
1214
1215 const MachineMemOperand *MMO = *MI.memoperands_begin();
1216 const Value *Ptr = MMO->getValue();
1217
1218 // UndefValue means this is a load of a kernel input. These are uniform.
1219 // Sometimes LDS instructions have constant pointers.
1220 // If Ptr is null, then that means this mem operand contains a
1221 // PseudoSourceValue like GOT.
1222 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1223 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1224 return true;
1225
Matt Arsenault923712b2018-02-09 16:57:57 +00001226 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1227 return true;
1228
Tom Stellardca166212017-01-30 21:56:46 +00001229 const Instruction *I = dyn_cast<Instruction>(Ptr);
1230 return I && I->getMetadata("amdgpu.uniform");
1231}
1232
Tom Stellardca166212017-01-30 21:56:46 +00001233bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1234 for (const GEPInfo &GEPInfo : AddrInfo) {
1235 if (!GEPInfo.VgprParts.empty())
1236 return true;
1237 }
1238 return false;
1239}
1240
Tom Stellardca166212017-01-30 21:56:46 +00001241bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1242 MachineBasicBlock *BB = I.getParent();
1243 MachineFunction *MF = BB->getParent();
1244 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaulta3107272019-07-01 16:36:39 +00001245 const DebugLoc &DL = I.getDebugLoc();
1246 Register DstReg = I.getOperand(0).getReg();
1247 Register PtrReg = I.getOperand(1).getReg();
Tom Stellardca166212017-01-30 21:56:46 +00001248 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
1249 unsigned Opcode;
1250
Matt Arsenaulta3107272019-07-01 16:36:39 +00001251 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
1252 LLVM_DEBUG(dbgs() << "Unhandled address space\n");
1253 return false;
1254 }
1255
Tom Stellardca166212017-01-30 21:56:46 +00001256 SmallVector<GEPInfo, 4> AddrInfo;
1257
1258 getAddrModeInfo(I, MRI, AddrInfo);
1259
Tom Stellardca166212017-01-30 21:56:46 +00001260 switch (LoadSize) {
Tom Stellardca166212017-01-30 21:56:46 +00001261 case 32:
1262 Opcode = AMDGPU::FLAT_LOAD_DWORD;
1263 break;
1264 case 64:
1265 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
1266 break;
Matt Arsenaulta3107272019-07-01 16:36:39 +00001267 default:
1268 LLVM_DEBUG(dbgs() << "Unhandled load size\n");
1269 return false;
Tom Stellardca166212017-01-30 21:56:46 +00001270 }
1271
1272 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
1273 .add(I.getOperand(0))
1274 .addReg(PtrReg)
Matt Arsenaultfd023142017-06-12 15:55:58 +00001275 .addImm(0) // offset
1276 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +00001277 .addImm(0) // slc
1278 .addImm(0); // dlc
Tom Stellardca166212017-01-30 21:56:46 +00001279
1280 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
1281 I.eraseFromParent();
1282 return Ret;
1283}
1284
Matt Arsenault64642802019-07-01 15:39:27 +00001285bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1286 MachineBasicBlock *BB = I.getParent();
1287 MachineFunction *MF = BB->getParent();
1288 MachineRegisterInfo &MRI = MF->getRegInfo();
1289 MachineOperand &CondOp = I.getOperand(0);
1290 Register CondReg = CondOp.getReg();
1291 const DebugLoc &DL = I.getDebugLoc();
1292
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001293 unsigned BrOpcode;
1294 Register CondPhysReg;
1295 const TargetRegisterClass *ConstrainRC;
1296
1297 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1298 // whether the branch is uniform when selecting the instruction. In
1299 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1300 // RegBankSelect knows what it's doing if the branch condition is scc, even
1301 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001302 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001303 CondPhysReg = AMDGPU::SCC;
1304 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1305 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1306 } else if (isVCC(CondReg, MRI)) {
1307 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1308 // We sort of know that a VCC producer based on the register bank, that ands
1309 // inactive lanes with 0. What if there was a logical operation with vcc
1310 // producers in different blocks/with different exec masks?
1311 // FIXME: Should scc->vcc copies and with exec?
1312 CondPhysReg = TRI.getVCC();
1313 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1314 ConstrainRC = TRI.getBoolRC();
1315 } else
1316 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001317
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001318 if (!MRI.getRegClassOrNull(CondReg))
1319 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001320
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001321 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1322 .addReg(CondReg);
1323 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1324 .addMBB(I.getOperand(1).getMBB());
1325
1326 I.eraseFromParent();
1327 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001328}
1329
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001330bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1331 MachineBasicBlock *BB = I.getParent();
1332 MachineFunction *MF = BB->getParent();
1333 MachineRegisterInfo &MRI = MF->getRegInfo();
1334
1335 Register DstReg = I.getOperand(0).getReg();
1336 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1337 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1338 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1339 if (IsVGPR)
1340 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1341
1342 return RBI.constrainGenericRegister(
1343 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1344}
1345
Daniel Sandersf76f3152017-11-16 00:46:35 +00001346bool AMDGPUInstructionSelector::select(MachineInstr &I,
1347 CodeGenCoverage &CoverageInfo) const {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001348 if (I.isPHI())
1349 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001350
Tom Stellard7712ee82018-06-22 00:44:29 +00001351 if (!isPreISelGenericOpcode(I.getOpcode())) {
1352 if (I.isCopy())
1353 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001354 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001355 }
Tom Stellardca166212017-01-30 21:56:46 +00001356
1357 switch (I.getOpcode()) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +00001358 case TargetOpcode::G_AND:
1359 case TargetOpcode::G_OR:
1360 case TargetOpcode::G_XOR:
1361 if (selectG_AND_OR_XOR(I))
1362 return true;
1363 return selectImpl(I, CoverageInfo);
Tom Stellard9e9dd302019-07-01 16:09:33 +00001364 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001365 case TargetOpcode::G_SUB:
1366 if (selectG_ADD_SUB(I))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001367 return true;
1368 LLVM_FALLTHROUGH;
Tom Stellardca166212017-01-30 21:56:46 +00001369 default:
Tom Stellard1dc90202018-05-10 20:53:06 +00001370 return selectImpl(I, CoverageInfo);
Tom Stellard7c650782018-10-05 04:34:09 +00001371 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001372 case TargetOpcode::G_BITCAST:
1373 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001374 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001375 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001376 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001377 case TargetOpcode::G_EXTRACT:
1378 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001379 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001380 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001381 case TargetOpcode::G_CONCAT_VECTORS:
1382 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001383 case TargetOpcode::G_UNMERGE_VALUES:
1384 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001385 case TargetOpcode::G_GEP:
1386 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001387 case TargetOpcode::G_IMPLICIT_DEF:
1388 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001389 case TargetOpcode::G_INSERT:
1390 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001391 case TargetOpcode::G_INTRINSIC:
1392 return selectG_INTRINSIC(I, CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +00001393 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1394 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001395 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001396 if (selectG_ICMP(I))
1397 return true;
1398 return selectImpl(I, CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001399 case TargetOpcode::G_LOAD:
Tom Stellard79b5c382019-02-20 21:02:37 +00001400 if (selectImpl(I, CoverageInfo))
1401 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001402 return selectG_LOAD(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001403 case TargetOpcode::G_SELECT:
1404 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001405 case TargetOpcode::G_STORE:
1406 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001407 case TargetOpcode::G_TRUNC:
1408 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001409 case TargetOpcode::G_SEXT:
1410 case TargetOpcode::G_ZEXT:
1411 case TargetOpcode::G_ANYEXT:
1412 if (selectG_SZA_EXT(I)) {
1413 I.eraseFromParent();
1414 return true;
1415 }
1416
1417 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001418 case TargetOpcode::G_BRCOND:
1419 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001420 case TargetOpcode::G_FRAME_INDEX:
1421 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001422 case TargetOpcode::G_FENCE:
1423 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1424 // is checking for G_CONSTANT
1425 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1426 return true;
Tom Stellardca166212017-01-30 21:56:46 +00001427 }
1428 return false;
1429}
Tom Stellard1dc90202018-05-10 20:53:06 +00001430
Tom Stellard26fac0f2018-06-22 02:54:57 +00001431InstructionSelector::ComplexRendererFns
1432AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1433 return {{
1434 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1435 }};
1436
1437}
1438
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001439std::pair<Register, unsigned>
1440AMDGPUInstructionSelector::selectVOP3ModsImpl(
1441 Register Src, const MachineRegisterInfo &MRI) const {
1442 unsigned Mods = 0;
1443 MachineInstr *MI = MRI.getVRegDef(Src);
1444
1445 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1446 Src = MI->getOperand(1).getReg();
1447 Mods |= SISrcMods::NEG;
1448 MI = MRI.getVRegDef(Src);
1449 }
1450
1451 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1452 Src = MI->getOperand(1).getReg();
1453 Mods |= SISrcMods::ABS;
1454 }
1455
1456 return std::make_pair(Src, Mods);
1457}
1458
Tom Stellard1dc90202018-05-10 20:53:06 +00001459///
1460/// This will select either an SGPR or VGPR operand and will save us from
1461/// having to write an extra tablegen pattern.
1462InstructionSelector::ComplexRendererFns
1463AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1464 return {{
1465 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1466 }};
1467}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001468
1469InstructionSelector::ComplexRendererFns
1470AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001471 MachineRegisterInfo &MRI
1472 = Root.getParent()->getParent()->getParent()->getRegInfo();
1473
1474 Register Src;
1475 unsigned Mods;
1476 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1477
Tom Stellarddcc95e92018-05-11 05:44:16 +00001478 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001479 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1480 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1481 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1482 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001483 }};
1484}
Tom Stellard9a653572018-06-22 02:34:29 +00001485InstructionSelector::ComplexRendererFns
1486AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1487 return {{
1488 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1489 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1490 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1491 }};
1492}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001493
1494InstructionSelector::ComplexRendererFns
1495AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001496 MachineRegisterInfo &MRI
1497 = Root.getParent()->getParent()->getParent()->getRegInfo();
1498
1499 Register Src;
1500 unsigned Mods;
1501 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1502
Tom Stellard46bbbc32018-06-13 22:30:47 +00001503 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001504 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1505 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001506 }};
1507}
Tom Stellard79b5c382019-02-20 21:02:37 +00001508
1509InstructionSelector::ComplexRendererFns
1510AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1511 MachineRegisterInfo &MRI =
1512 Root.getParent()->getParent()->getParent()->getRegInfo();
1513
1514 SmallVector<GEPInfo, 4> AddrInfo;
1515 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1516
1517 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1518 return None;
1519
1520 const GEPInfo &GEPInfo = AddrInfo[0];
1521
1522 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1523 return None;
1524
1525 unsigned PtrReg = GEPInfo.SgprParts[0];
1526 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1527 return {{
1528 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1529 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1530 }};
1531}
1532
1533InstructionSelector::ComplexRendererFns
1534AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1535 MachineRegisterInfo &MRI =
1536 Root.getParent()->getParent()->getParent()->getRegInfo();
1537
1538 SmallVector<GEPInfo, 4> AddrInfo;
1539 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1540
1541 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1542 return None;
1543
1544 const GEPInfo &GEPInfo = AddrInfo[0];
1545 unsigned PtrReg = GEPInfo.SgprParts[0];
1546 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1547 if (!isUInt<32>(EncodedImm))
1548 return None;
1549
1550 return {{
1551 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1552 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1553 }};
1554}
1555
1556InstructionSelector::ComplexRendererFns
1557AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1558 MachineInstr *MI = Root.getParent();
1559 MachineBasicBlock *MBB = MI->getParent();
1560 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1561
1562 SmallVector<GEPInfo, 4> AddrInfo;
1563 getAddrModeInfo(*MI, MRI, AddrInfo);
1564
1565 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1566 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1567 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1568 return None;
1569
1570 const GEPInfo &GEPInfo = AddrInfo[0];
1571 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1572 return None;
1573
1574 // If we make it this far we have a load with an 32-bit immediate offset.
1575 // It is OK to select this using a sgpr offset, because we have already
1576 // failed trying to select this load into one of the _IMM variants since
1577 // the _IMM Patterns are considered before the _SGPR patterns.
1578 unsigned PtrReg = GEPInfo.SgprParts[0];
1579 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1580 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1581 .addImm(GEPInfo.Imm);
1582 return {{
1583 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1584 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1585 }};
1586}