blob: 217b3996996121de86dde79629a8cd67c52240d2 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellardca166212017-01-30 21:56:46 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AMDGPU.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUInstructionSelector.h"
15#include "AMDGPUInstrInfo.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUSubtarget.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000019#include "AMDGPUTargetMachine.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000020#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Matt Arsenault7161fb02019-07-16 19:22:21 +000021#include "SIMachineFunctionInfo.h"
Matt Arsenault2dd088e2019-09-09 15:39:32 +000022#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
Tom Stellard1dc90202018-05-10 20:53:06 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Matt Arsenault7161fb02019-07-16 19:22:21 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Tom Stellardca166212017-01-30 21:56:46 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/IR/Type.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/raw_ostream.h"
35
36#define DEBUG_TYPE "amdgpu-isel"
37
38using namespace llvm;
Matt Arsenault7161fb02019-07-16 19:22:21 +000039using namespace MIPatternMatch;
Tom Stellardca166212017-01-30 21:56:46 +000040
Tom Stellard1dc90202018-05-10 20:53:06 +000041#define GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000042#define AMDGPUSubtarget GCNSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000043#include "AMDGPUGenGlobalISel.inc"
44#undef GET_GLOBALISEL_IMPL
Tom Stellard5bfbae52018-07-11 20:59:01 +000045#undef AMDGPUSubtarget
Tom Stellard1dc90202018-05-10 20:53:06 +000046
Tom Stellardca166212017-01-30 21:56:46 +000047AMDGPUInstructionSelector::AMDGPUInstructionSelector(
Tom Stellard5bfbae52018-07-11 20:59:01 +000048 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
Tom Stellard1dc90202018-05-10 20:53:06 +000049 const AMDGPUTargetMachine &TM)
Tom Stellardca166212017-01-30 21:56:46 +000050 : InstructionSelector(), TII(*STI.getInstrInfo()),
Tom Stellard1dc90202018-05-10 20:53:06 +000051 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
52 STI(STI),
53 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
54#define GET_GLOBALISEL_PREDICATES_INIT
55#include "AMDGPUGenGlobalISel.inc"
56#undef GET_GLOBALISEL_PREDICATES_INIT
57#define GET_GLOBALISEL_TEMPORARIES_INIT
58#include "AMDGPUGenGlobalISel.inc"
59#undef GET_GLOBALISEL_TEMPORARIES_INIT
Tom Stellard1dc90202018-05-10 20:53:06 +000060{
61}
62
63const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
Tom Stellardca166212017-01-30 21:56:46 +000064
Matt Arsenault2ab25f92019-07-01 16:06:02 +000065static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
Daniel Sanders2bea69b2019-08-01 23:27:28 +000066 if (Register::isPhysicalRegister(Reg))
Matt Arsenault2ab25f92019-07-01 16:06:02 +000067 return Reg == AMDGPU::SCC;
Tom Stellard8b1c53b2019-06-17 16:27:43 +000068
69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70 const TargetRegisterClass *RC =
71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
Matt Arsenault1daad912019-07-01 15:23:04 +000072 if (RC) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +000073 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
74 // context of the register bank has been lost.
Matt Arsenault1daad912019-07-01 15:23:04 +000075 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
76 return false;
77 const LLT Ty = MRI.getType(Reg);
78 return Ty.isValid() && Ty.getSizeInBits() == 1;
79 }
Tom Stellard8b1c53b2019-06-17 16:27:43 +000080
81 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
82 return RB->getID() == AMDGPU::SCCRegBankID;
83}
84
Matt Arsenault2ab25f92019-07-01 16:06:02 +000085bool AMDGPUInstructionSelector::isVCC(Register Reg,
86 const MachineRegisterInfo &MRI) const {
Daniel Sanders2bea69b2019-08-01 23:27:28 +000087 if (Register::isPhysicalRegister(Reg))
Matt Arsenault2ab25f92019-07-01 16:06:02 +000088 return Reg == TRI.getVCC();
Matt Arsenault9f992c22019-07-01 13:22:07 +000089
90 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
91 const TargetRegisterClass *RC =
92 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
93 if (RC) {
Matt Arsenault18b71332019-07-15 19:44:07 +000094 const LLT Ty = MRI.getType(Reg);
Matt Arsenault2ab25f92019-07-01 16:06:02 +000095 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
Matt Arsenault18b71332019-07-15 19:44:07 +000096 Ty.isValid() && Ty.getSizeInBits() == 1;
Matt Arsenault9f992c22019-07-01 13:22:07 +000097 }
98
99 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
100 return RB->getID() == AMDGPU::VCCRegBankID;
101}
102
Tom Stellard1e0edad2018-05-10 21:20:10 +0000103bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
Matt Arsenault18b71332019-07-15 19:44:07 +0000104 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard1e0edad2018-05-10 21:20:10 +0000105 MachineBasicBlock *BB = I.getParent();
106 MachineFunction *MF = BB->getParent();
107 MachineRegisterInfo &MRI = MF->getRegInfo();
108 I.setDesc(TII.get(TargetOpcode::COPY));
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000109
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000110 const MachineOperand &Src = I.getOperand(1);
Matt Arsenault18b71332019-07-15 19:44:07 +0000111 MachineOperand &Dst = I.getOperand(0);
112 Register DstReg = Dst.getReg();
113 Register SrcReg = Src.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000114
Matt Arsenault18b71332019-07-15 19:44:07 +0000115 if (isVCC(DstReg, MRI)) {
116 if (SrcReg == AMDGPU::SCC) {
117 const TargetRegisterClass *RC
118 = TRI.getConstrainedRegClassForOperand(Dst, MRI);
119 if (!RC)
120 return true;
121 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000122 }
Matt Arsenault18b71332019-07-15 19:44:07 +0000123
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000124 if (!isVCC(SrcReg, MRI)) {
125 // TODO: Should probably leave the copy and let copyPhysReg expand it.
126 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
127 return false;
Matt Arsenault3bfdb542019-07-15 19:45:49 +0000128
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000129 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
130 .addImm(0)
131 .addReg(SrcReg);
Matt Arsenault18b71332019-07-15 19:44:07 +0000132
Matt Arsenaulte1b52f42019-07-15 19:46:48 +0000133 if (!MRI.getRegClassOrNull(SrcReg))
134 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
135 I.eraseFromParent();
136 return true;
137 }
Matt Arsenaultad19b502019-07-15 19:48:36 +0000138
139 const TargetRegisterClass *RC =
140 TRI.getConstrainedRegClassForOperand(Dst, MRI);
141 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
142 return false;
143
144 // Don't constrain the source register to a class so the def instruction
145 // handles it (unless it's undef).
146 //
147 // FIXME: This is a hack. When selecting the def, we neeed to know
148 // specifically know that the result is VCCRegBank, and not just an SGPR
149 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
150 if (Src.isUndef()) {
151 const TargetRegisterClass *SrcRC =
152 TRI.getConstrainedRegClassForOperand(Src, MRI);
153 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
154 return false;
155 }
156
157 return true;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000158 }
159
Tom Stellard1e0edad2018-05-10 21:20:10 +0000160 for (const MachineOperand &MO : I.operands()) {
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000161 if (Register::isPhysicalRegister(MO.getReg()))
Tom Stellard1e0edad2018-05-10 21:20:10 +0000162 continue;
163
164 const TargetRegisterClass *RC =
165 TRI.getConstrainedRegClassForOperand(MO, MRI);
166 if (!RC)
167 continue;
168 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
169 }
170 return true;
171}
172
Matt Arsenaulte1006252019-07-01 16:32:47 +0000173bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
174 MachineBasicBlock *BB = I.getParent();
175 MachineFunction *MF = BB->getParent();
176 MachineRegisterInfo &MRI = MF->getRegInfo();
177
178 const Register DefReg = I.getOperand(0).getReg();
179 const LLT DefTy = MRI.getType(DefReg);
180
181 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
182
183 const RegClassOrRegBank &RegClassOrBank =
184 MRI.getRegClassOrRegBank(DefReg);
185
186 const TargetRegisterClass *DefRC
187 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
188 if (!DefRC) {
189 if (!DefTy.isValid()) {
190 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
191 return false;
192 }
193
194 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
195 if (RB.getID() == AMDGPU::SCCRegBankID) {
196 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
197 return false;
198 }
199
200 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
201 if (!DefRC) {
202 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
203 return false;
204 }
205 }
206
207 I.setDesc(TII.get(TargetOpcode::PHI));
208 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
209}
210
Tom Stellardca166212017-01-30 21:56:46 +0000211MachineOperand
212AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000213 const TargetRegisterClass &SubRC,
Tom Stellardca166212017-01-30 21:56:46 +0000214 unsigned SubIdx) const {
215
216 MachineInstr *MI = MO.getParent();
217 MachineBasicBlock *BB = MO.getParent()->getParent();
218 MachineFunction *MF = BB->getParent();
219 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000220 Register DstReg = MRI.createVirtualRegister(&SubRC);
Tom Stellardca166212017-01-30 21:56:46 +0000221
222 if (MO.isReg()) {
223 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
Daniel Sanders0c476112019-08-15 19:22:08 +0000224 Register Reg = MO.getReg();
Tom Stellardca166212017-01-30 21:56:46 +0000225 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
226 .addReg(Reg, 0, ComposedSubIdx);
227
228 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
229 MO.isKill(), MO.isDead(), MO.isUndef(),
230 MO.isEarlyClobber(), 0, MO.isDebug(),
231 MO.isInternalRead());
232 }
233
234 assert(MO.isImm());
235
236 APInt Imm(64, MO.getImm());
237
238 switch (SubIdx) {
239 default:
240 llvm_unreachable("do not know to split immediate with this sub index.");
241 case AMDGPU::sub0:
242 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
243 case AMDGPU::sub1:
244 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
245 }
246}
247
Tom Stellard390a5f42018-07-13 21:05:14 +0000248static int64_t getConstant(const MachineInstr *MI) {
249 return MI->getOperand(1).getCImm()->getSExtValue();
250}
251
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000252static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
253 switch (Opc) {
254 case AMDGPU::G_AND:
255 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
256 case AMDGPU::G_OR:
257 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
258 case AMDGPU::G_XOR:
259 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
260 default:
261 llvm_unreachable("not a bit op");
262 }
263}
264
265bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
266 MachineBasicBlock *BB = I.getParent();
267 MachineFunction *MF = BB->getParent();
268 MachineRegisterInfo &MRI = MF->getRegInfo();
269 MachineOperand &Dst = I.getOperand(0);
270 MachineOperand &Src0 = I.getOperand(1);
271 MachineOperand &Src1 = I.getOperand(2);
272 Register DstReg = Dst.getReg();
273 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
274
275 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
276 if (DstRB->getID() == AMDGPU::VCCRegBankID) {
277 const TargetRegisterClass *RC = TRI.getBoolRC();
278 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
279 RC == &AMDGPU::SReg_64RegClass);
280 I.setDesc(TII.get(InstOpc));
281
282 // FIXME: Hack to avoid turning the register bank into a register class.
283 // The selector for G_ICMP relies on seeing the register bank for the result
284 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
285 // be ambiguous whether it's a scalar or vector bool.
286 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
287 MRI.setRegClass(Src0.getReg(), RC);
288 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
289 MRI.setRegClass(Src1.getReg(), RC);
290
291 return RBI.constrainGenericRegister(DstReg, *RC, MRI);
292 }
293
294 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
295 // the result?
296 if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000297 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
298 I.setDesc(TII.get(InstOpc));
Matt Arsenaulta8bbcbd2019-08-28 02:11:03 +0000299 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Matt Arsenaultc8291c92019-07-15 19:50:07 +0000300 }
301
302 return false;
303}
304
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000305bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
Tom Stellardca166212017-01-30 21:56:46 +0000306 MachineBasicBlock *BB = I.getParent();
307 MachineFunction *MF = BB->getParent();
308 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000309 Register DstReg = I.getOperand(0).getReg();
310 const DebugLoc &DL = I.getDebugLoc();
311 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
312 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
313 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000314 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
Tom Stellardca166212017-01-30 21:56:46 +0000315
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000316 if (Size == 32) {
317 if (IsSALU) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000318 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000319 MachineInstr *Add =
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000320 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000321 .add(I.getOperand(1))
322 .add(I.getOperand(2));
323 I.eraseFromParent();
324 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
325 }
Tom Stellardca166212017-01-30 21:56:46 +0000326
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000327 if (STI.hasAddNoCarry()) {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000328 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
329 I.setDesc(TII.get(Opc));
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000330 I.addOperand(*MF, MachineOperand::CreateImm(0));
331 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
332 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
333 }
Tom Stellardca166212017-01-30 21:56:46 +0000334
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000335 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
336
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000337 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
338 MachineInstr *Add
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000339 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000340 .addDef(UnusedCarry, RegState::Dead)
341 .add(I.getOperand(1))
342 .add(I.getOperand(2))
343 .addImm(0);
344 I.eraseFromParent();
345 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
Tom Stellardca166212017-01-30 21:56:46 +0000346 }
347
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000348 assert(!Sub && "illegal sub should not reach here");
349
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000350 const TargetRegisterClass &RC
351 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
352 const TargetRegisterClass &HalfRC
353 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
354
355 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
356 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
357 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
358 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
359
360 Register DstLo = MRI.createVirtualRegister(&HalfRC);
361 Register DstHi = MRI.createVirtualRegister(&HalfRC);
362
363 if (IsSALU) {
364 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
365 .add(Lo1)
366 .add(Lo2);
367 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
368 .add(Hi1)
369 .add(Hi2);
370 } else {
371 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
372 Register CarryReg = MRI.createVirtualRegister(CarryRC);
373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
374 .addDef(CarryReg)
375 .add(Lo1)
376 .add(Lo2)
377 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000378 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000379 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
380 .add(Hi1)
381 .add(Hi2)
382 .addReg(CarryReg, RegState::Kill)
383 .addImm(0);
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000384
385 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
386 return false;
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000387 }
388
389 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
390 .addReg(DstLo)
391 .addImm(AMDGPU::sub0)
392 .addReg(DstHi)
393 .addImm(AMDGPU::sub1);
394
Matt Arsenault70a4d3f2019-07-02 14:40:22 +0000395
396 if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
Matt Arsenault0a52e9d2019-07-01 16:34:48 +0000397 return false;
398
Tom Stellardca166212017-01-30 21:56:46 +0000399 I.eraseFromParent();
400 return true;
401}
402
Tom Stellard41f32192019-02-28 23:37:48 +0000403bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
404 MachineBasicBlock *BB = I.getParent();
405 MachineFunction *MF = BB->getParent();
406 MachineRegisterInfo &MRI = MF->getRegInfo();
407 assert(I.getOperand(2).getImm() % 32 == 0);
408 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
409 const DebugLoc &DL = I.getDebugLoc();
410 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
411 I.getOperand(0).getReg())
412 .addReg(I.getOperand(1).getReg(), 0, SubReg);
413
414 for (const MachineOperand &MO : Copy->operands()) {
415 const TargetRegisterClass *RC =
416 TRI.getConstrainedRegClassForOperand(MO, MRI);
417 if (!RC)
418 continue;
419 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
420 }
421 I.eraseFromParent();
422 return true;
423}
424
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000425bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
426 MachineBasicBlock *BB = MI.getParent();
427 MachineFunction *MF = BB->getParent();
428 MachineRegisterInfo &MRI = MF->getRegInfo();
429 Register DstReg = MI.getOperand(0).getReg();
430 LLT DstTy = MRI.getType(DstReg);
431 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
432
433 const unsigned SrcSize = SrcTy.getSizeInBits();
Matt Arsenaulta65913e2019-07-15 17:26:43 +0000434 if (SrcSize < 32)
435 return false;
436
Matt Arsenault9b7ffc42019-07-09 14:02:20 +0000437 const DebugLoc &DL = MI.getDebugLoc();
438 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
439 const unsigned DstSize = DstTy.getSizeInBits();
440 const TargetRegisterClass *DstRC =
441 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
442 if (!DstRC)
443 return false;
444
445 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
446 MachineInstrBuilder MIB =
447 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
448 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
449 MachineOperand &Src = MI.getOperand(I + 1);
450 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
451 MIB.addImm(SubRegs[I]);
452
453 const TargetRegisterClass *SrcRC
454 = TRI.getConstrainedRegClassForOperand(Src, MRI);
455 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
456 return false;
457 }
458
459 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
460 return false;
461
462 MI.eraseFromParent();
463 return true;
464}
465
Matt Arsenault872f38b2019-07-09 14:02:26 +0000466bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
467 MachineBasicBlock *BB = MI.getParent();
468 MachineFunction *MF = BB->getParent();
469 MachineRegisterInfo &MRI = MF->getRegInfo();
470 const int NumDst = MI.getNumOperands() - 1;
471
472 MachineOperand &Src = MI.getOperand(NumDst);
473
474 Register SrcReg = Src.getReg();
475 Register DstReg0 = MI.getOperand(0).getReg();
476 LLT DstTy = MRI.getType(DstReg0);
477 LLT SrcTy = MRI.getType(SrcReg);
478
479 const unsigned DstSize = DstTy.getSizeInBits();
480 const unsigned SrcSize = SrcTy.getSizeInBits();
481 const DebugLoc &DL = MI.getDebugLoc();
482 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
483
484 const TargetRegisterClass *SrcRC =
485 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
486 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
487 return false;
488
489 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
490
491 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
492 // source, and this relies on the fact that the same subregister indices are
493 // used for both.
494 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
495 for (int I = 0, E = NumDst; I != E; ++I) {
496 MachineOperand &Dst = MI.getOperand(I);
497 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
498 .addReg(SrcReg, SrcFlags, SubRegs[I]);
499
500 const TargetRegisterClass *DstRC =
501 TRI.getConstrainedRegClassForOperand(Dst, MRI);
502 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
503 return false;
504 }
505
506 MI.eraseFromParent();
507 return true;
508}
509
Tom Stellardca166212017-01-30 21:56:46 +0000510bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
Matt Arsenaulte6d10f92019-07-09 14:05:11 +0000511 return selectG_ADD_SUB(I);
Tom Stellardca166212017-01-30 21:56:46 +0000512}
513
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000514bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
515 MachineBasicBlock *BB = I.getParent();
516 MachineFunction *MF = BB->getParent();
517 MachineRegisterInfo &MRI = MF->getRegInfo();
518 const MachineOperand &MO = I.getOperand(0);
Matt Arsenaultf8a841b2019-06-24 16:24:03 +0000519
520 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
521 // regbank check here is to know why getConstrainedRegClassForOperand failed.
522 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
523 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
524 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
525 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
526 return true;
527 }
528
529 return false;
Tom Stellard3f1c6fe2018-06-21 23:38:20 +0000530}
531
Tom Stellard33634d1b2019-03-01 00:50:26 +0000532bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
533 MachineBasicBlock *BB = I.getParent();
534 MachineFunction *MF = BB->getParent();
535 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenaultfb51e642019-09-16 14:26:14 +0000536
537 Register Src0Reg = I.getOperand(1).getReg();
538 Register Src1Reg = I.getOperand(2).getReg();
539 LLT Src1Ty = MRI.getType(Src1Reg);
540 if (Src1Ty.getSizeInBits() != 32)
541 return false;
542
543 int64_t Offset = I.getOperand(3).getImm();
544 if (Offset % 32 != 0)
545 return false;
546
547 unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
548 const DebugLoc &DL = I.getDebugLoc();
549
Tom Stellard33634d1b2019-03-01 00:50:26 +0000550 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
551 .addDef(I.getOperand(0).getReg())
Matt Arsenaultfb51e642019-09-16 14:26:14 +0000552 .addReg(Src0Reg)
553 .addReg(Src1Reg)
Tom Stellard33634d1b2019-03-01 00:50:26 +0000554 .addImm(SubReg);
555
556 for (const MachineOperand &MO : Ins->operands()) {
557 if (!MO.isReg())
558 continue;
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000559 if (Register::isPhysicalRegister(MO.getReg()))
Tom Stellard33634d1b2019-03-01 00:50:26 +0000560 continue;
561
562 const TargetRegisterClass *RC =
563 TRI.getConstrainedRegClassForOperand(MO, MRI);
564 if (!RC)
565 continue;
566 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
567 }
568 I.eraseFromParent();
569 return true;
570}
571
Amara Emersone14c91b2019-08-13 06:26:59 +0000572bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
Matt Arsenaultfee19492019-06-17 17:01:27 +0000573 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
Tom Stellarda9284732018-06-14 19:26:37 +0000574 switch (IntrinsicID) {
Matt Arsenault53fa7592019-07-15 18:25:24 +0000575 case Intrinsic::amdgcn_if_break: {
576 MachineBasicBlock *BB = I.getParent();
577 MachineFunction *MF = BB->getParent();
578 MachineRegisterInfo &MRI = MF->getRegInfo();
579
580 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
581 // SelectionDAG uses for wave32 vs wave64.
582 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
583 .add(I.getOperand(0))
584 .add(I.getOperand(2))
585 .add(I.getOperand(3));
586
587 Register DstReg = I.getOperand(0).getReg();
588 Register Src0Reg = I.getOperand(2).getReg();
589 Register Src1Reg = I.getOperand(3).getReg();
590
591 I.eraseFromParent();
592
593 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
594 if (!MRI.getRegClassOrNull(Reg))
595 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
596 }
597
598 return true;
599 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000600 default:
Amara Emersone14c91b2019-08-13 06:26:59 +0000601 return selectImpl(I, *CoverageInfo);
Tom Stellarda9284732018-06-14 19:26:37 +0000602 }
Tom Stellarda9284732018-06-14 19:26:37 +0000603}
604
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000605static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
606 if (Size != 32 && Size != 64)
607 return -1;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000608 switch (P) {
609 default:
610 llvm_unreachable("Unknown condition code!");
611 case CmpInst::ICMP_NE:
612 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
613 case CmpInst::ICMP_EQ:
614 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
615 case CmpInst::ICMP_SGT:
616 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
617 case CmpInst::ICMP_SGE:
618 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
619 case CmpInst::ICMP_SLT:
620 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
621 case CmpInst::ICMP_SLE:
622 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
623 case CmpInst::ICMP_UGT:
624 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
625 case CmpInst::ICMP_UGE:
626 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
627 case CmpInst::ICMP_ULT:
628 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
629 case CmpInst::ICMP_ULE:
630 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
631 }
632}
633
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000634int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
635 unsigned Size) const {
636 if (Size == 64) {
637 if (!STI.hasScalarCompareEq64())
638 return -1;
639
640 switch (P) {
641 case CmpInst::ICMP_NE:
642 return AMDGPU::S_CMP_LG_U64;
643 case CmpInst::ICMP_EQ:
644 return AMDGPU::S_CMP_EQ_U64;
645 default:
646 return -1;
647 }
648 }
649
650 if (Size != 32)
651 return -1;
652
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000653 switch (P) {
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000654 case CmpInst::ICMP_NE:
655 return AMDGPU::S_CMP_LG_U32;
656 case CmpInst::ICMP_EQ:
657 return AMDGPU::S_CMP_EQ_U32;
658 case CmpInst::ICMP_SGT:
659 return AMDGPU::S_CMP_GT_I32;
660 case CmpInst::ICMP_SGE:
661 return AMDGPU::S_CMP_GE_I32;
662 case CmpInst::ICMP_SLT:
663 return AMDGPU::S_CMP_LT_I32;
664 case CmpInst::ICMP_SLE:
665 return AMDGPU::S_CMP_LE_I32;
666 case CmpInst::ICMP_UGT:
667 return AMDGPU::S_CMP_GT_U32;
668 case CmpInst::ICMP_UGE:
669 return AMDGPU::S_CMP_GE_U32;
670 case CmpInst::ICMP_ULT:
671 return AMDGPU::S_CMP_LT_U32;
672 case CmpInst::ICMP_ULE:
673 return AMDGPU::S_CMP_LE_U32;
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000674 default:
675 llvm_unreachable("Unknown condition code!");
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000676 }
677}
678
679bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
680 MachineBasicBlock *BB = I.getParent();
681 MachineFunction *MF = BB->getParent();
682 MachineRegisterInfo &MRI = MF->getRegInfo();
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000683 const DebugLoc &DL = I.getDebugLoc();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000684
Daniel Sanders0c476112019-08-15 19:22:08 +0000685 Register SrcReg = I.getOperand(2).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000686 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000687
688 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000689
Daniel Sanders0c476112019-08-15 19:22:08 +0000690 Register CCReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000691 if (isSCC(CCReg, MRI)) {
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000692 int Opcode = getS_CMPOpcode(Pred, Size);
693 if (Opcode == -1)
694 return false;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000695 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
696 .add(I.getOperand(2))
697 .add(I.getOperand(3));
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000698 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
699 .addReg(AMDGPU::SCC);
700 bool Ret =
701 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
702 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000703 I.eraseFromParent();
704 return Ret;
705 }
706
Matt Arsenault3b7668a2019-07-01 13:34:26 +0000707 int Opcode = getV_CMPOpcode(Pred, Size);
708 if (Opcode == -1)
709 return false;
710
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000711 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
712 I.getOperand(0).getReg())
713 .add(I.getOperand(2))
714 .add(I.getOperand(3));
715 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
Matt Arsenault5dfd4662019-07-15 19:39:31 +0000716 *TRI.getBoolRC(), MRI);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000717 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
718 I.eraseFromParent();
719 return Ret;
720}
721
Tom Stellard390a5f42018-07-13 21:05:14 +0000722static MachineInstr *
723buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
724 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
725 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
726 const DebugLoc &DL = Insert->getDebugLoc();
727 MachineBasicBlock &BB = *Insert->getParent();
728 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
729 return BuildMI(BB, Insert, DL, TII.get(Opcode))
730 .addImm(Tgt)
731 .addReg(Reg0)
732 .addReg(Reg1)
733 .addReg(Reg2)
734 .addReg(Reg3)
735 .addImm(VM)
736 .addImm(Compr)
737 .addImm(Enabled);
738}
739
740bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
Amara Emersone14c91b2019-08-13 06:26:59 +0000741 MachineInstr &I) const {
Tom Stellard390a5f42018-07-13 21:05:14 +0000742 MachineBasicBlock *BB = I.getParent();
743 MachineFunction *MF = BB->getParent();
744 MachineRegisterInfo &MRI = MF->getRegInfo();
745
Matt Arsenault3b7ffc62019-09-13 04:12:12 +0000746 unsigned IntrinsicID = I.getIntrinsicID();
Tom Stellard390a5f42018-07-13 21:05:14 +0000747 switch (IntrinsicID) {
748 case Intrinsic::amdgcn_exp: {
749 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
750 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
751 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
752 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
753
754 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
755 I.getOperand(4).getReg(),
756 I.getOperand(5).getReg(),
757 I.getOperand(6).getReg(),
758 VM, false, Enabled, Done);
759
760 I.eraseFromParent();
761 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
762 }
763 case Intrinsic::amdgcn_exp_compr: {
764 const DebugLoc &DL = I.getDebugLoc();
765 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
766 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
Daniel Sanders0c476112019-08-15 19:22:08 +0000767 Register Reg0 = I.getOperand(3).getReg();
768 Register Reg1 = I.getOperand(4).getReg();
769 Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard390a5f42018-07-13 21:05:14 +0000770 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
771 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
772
773 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
774 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
775 true, Enabled, Done);
776
777 I.eraseFromParent();
778 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
779 }
Matt Arsenaultb3901212019-07-15 18:18:46 +0000780 case Intrinsic::amdgcn_end_cf: {
781 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
782 // SelectionDAG uses for wave32 vs wave64.
783 BuildMI(*BB, &I, I.getDebugLoc(),
784 TII.get(AMDGPU::SI_END_CF))
785 .add(I.getOperand(1));
786
787 Register Reg = I.getOperand(1).getReg();
788 I.eraseFromParent();
789
790 if (!MRI.getRegClassOrNull(Reg))
791 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
792 return true;
793 }
Matt Arsenault50be3482019-07-02 14:52:16 +0000794 default:
Amara Emersone14c91b2019-08-13 06:26:59 +0000795 return selectImpl(I, *CoverageInfo);
Tom Stellard390a5f42018-07-13 21:05:14 +0000796 }
Tom Stellard390a5f42018-07-13 21:05:14 +0000797}
798
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000799bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
800 MachineBasicBlock *BB = I.getParent();
801 MachineFunction *MF = BB->getParent();
802 MachineRegisterInfo &MRI = MF->getRegInfo();
803 const DebugLoc &DL = I.getDebugLoc();
804
Daniel Sanders0c476112019-08-15 19:22:08 +0000805 Register DstReg = I.getOperand(0).getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000806 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000807 assert(Size <= 32 || Size == 64);
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000808 const MachineOperand &CCOp = I.getOperand(1);
Daniel Sanders0c476112019-08-15 19:22:08 +0000809 Register CCReg = CCOp.getReg();
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000810 if (isSCC(CCReg, MRI)) {
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000811 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
812 AMDGPU::S_CSELECT_B32;
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000813 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
814 .addReg(CCReg);
815
816 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
817 // bank, because it does not cover the register class that we used to represent
818 // for it. So we need to manually set the register class here.
819 if (!MRI.getRegClassOrNull(CCReg))
820 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
821 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
822 .add(I.getOperand(2))
823 .add(I.getOperand(3));
824
825 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
826 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
827 I.eraseFromParent();
828 return Ret;
829 }
830
Matt Arsenaultfdf36722019-07-01 15:42:47 +0000831 // Wide VGPR select should have been split in RegBankSelect.
832 if (Size > 32)
833 return false;
834
Tom Stellard8b1c53b2019-06-17 16:27:43 +0000835 MachineInstr *Select =
836 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
837 .addImm(0)
838 .add(I.getOperand(3))
839 .addImm(0)
840 .add(I.getOperand(2))
841 .add(I.getOperand(1));
842
843 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
844 I.eraseFromParent();
845 return Ret;
846}
847
Amara Emersone14c91b2019-08-13 06:26:59 +0000848bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
Matt Arsenault57495262019-08-01 03:52:40 +0000849 initM0(I);
Amara Emersone14c91b2019-08-13 06:26:59 +0000850 return selectImpl(I, *CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +0000851}
852
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000853static int sizeToSubRegIndex(unsigned Size) {
854 switch (Size) {
855 case 32:
856 return AMDGPU::sub0;
857 case 64:
858 return AMDGPU::sub0_sub1;
859 case 96:
860 return AMDGPU::sub0_sub1_sub2;
861 case 128:
862 return AMDGPU::sub0_sub1_sub2_sub3;
863 case 256:
864 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
865 default:
866 if (Size < 32)
867 return AMDGPU::sub0;
868 if (Size > 256)
869 return -1;
870 return sizeToSubRegIndex(PowerOf2Ceil(Size));
871 }
872}
873
874bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
875 MachineBasicBlock *BB = I.getParent();
876 MachineFunction *MF = BB->getParent();
877 MachineRegisterInfo &MRI = MF->getRegInfo();
878
Daniel Sanders0c476112019-08-15 19:22:08 +0000879 Register DstReg = I.getOperand(0).getReg();
880 Register SrcReg = I.getOperand(1).getReg();
Matt Arsenaultdbb6c032019-06-24 18:02:18 +0000881 const LLT DstTy = MRI.getType(DstReg);
882 const LLT SrcTy = MRI.getType(SrcReg);
883 if (!DstTy.isScalar())
884 return false;
885
886 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
887 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
888 if (SrcRB != DstRB)
889 return false;
890
891 unsigned DstSize = DstTy.getSizeInBits();
892 unsigned SrcSize = SrcTy.getSizeInBits();
893
894 const TargetRegisterClass *SrcRC
895 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
896 const TargetRegisterClass *DstRC
897 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
898
899 if (SrcSize > 32) {
900 int SubRegIdx = sizeToSubRegIndex(DstSize);
901 if (SubRegIdx == -1)
902 return false;
903
904 // Deal with weird cases where the class only partially supports the subreg
905 // index.
906 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
907 if (!SrcRC)
908 return false;
909
910 I.getOperand(1).setSubReg(SubRegIdx);
911 }
912
913 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
914 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
915 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
916 return false;
917 }
918
919 I.setDesc(TII.get(TargetOpcode::COPY));
920 return true;
921}
922
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000923/// \returns true if a bitmask for \p Size bits will be an inline immediate.
924static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
925 Mask = maskTrailingOnes<unsigned>(Size);
926 int SignedMask = static_cast<int>(Mask);
927 return SignedMask >= -16 && SignedMask <= 64;
928}
929
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000930bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
931 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
932 const DebugLoc &DL = I.getDebugLoc();
933 MachineBasicBlock &MBB = *I.getParent();
934 MachineFunction &MF = *MBB.getParent();
935 MachineRegisterInfo &MRI = MF.getRegInfo();
Daniel Sanders0c476112019-08-15 19:22:08 +0000936 const Register DstReg = I.getOperand(0).getReg();
937 const Register SrcReg = I.getOperand(1).getReg();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000938
939 const LLT DstTy = MRI.getType(DstReg);
940 const LLT SrcTy = MRI.getType(SrcReg);
941 const LLT S1 = LLT::scalar(1);
942 const unsigned SrcSize = SrcTy.getSizeInBits();
943 const unsigned DstSize = DstTy.getSizeInBits();
944 if (!DstTy.isScalar())
945 return false;
946
947 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
948
949 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
950 if (SrcTy != S1 || DstSize > 64) // Invalid
951 return false;
952
953 unsigned Opcode =
954 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
955 const TargetRegisterClass *DstRC =
956 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
957
958 // FIXME: Create an extra copy to avoid incorrectly constraining the result
959 // of the scc producer.
Daniel Sanders0c476112019-08-15 19:22:08 +0000960 Register TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000961 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
962 .addReg(SrcReg);
963 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
964 .addReg(TmpReg);
965
966 // The instruction operands are backwards from what you would expect.
967 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
968 .addImm(0)
969 .addImm(Signed ? -1 : 1);
Matt Arsenault0e7d8692019-07-24 16:05:53 +0000970 I.eraseFromParent();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000971 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
972 }
973
974 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
975 if (SrcTy != S1) // Invalid
976 return false;
977
978 MachineInstr *ExtI =
979 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
980 .addImm(0) // src0_modifiers
981 .addImm(0) // src0
982 .addImm(0) // src1_modifiers
983 .addImm(Signed ? -1 : 1) // src1
984 .addUse(SrcReg);
Matt Arsenault0e7d8692019-07-24 16:05:53 +0000985 I.eraseFromParent();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +0000986 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
987 }
988
989 if (I.getOpcode() == AMDGPU::G_ANYEXT)
990 return selectCOPY(I);
991
992 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
993 // 64-bit should have been split up in RegBankSelect
Matt Arsenault5dafcb92019-07-01 13:22:06 +0000994
995 // Try to use an and with a mask if it will save code size.
996 unsigned Mask;
997 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
998 MachineInstr *ExtI =
999 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
1000 .addImm(Mask)
1001 .addReg(SrcReg);
Matt Arsenault0e7d8692019-07-24 16:05:53 +00001002 I.eraseFromParent();
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001003 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1004 }
1005
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001006 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
1007 MachineInstr *ExtI =
1008 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
1009 .addReg(SrcReg)
1010 .addImm(0) // Offset
1011 .addImm(SrcSize); // Width
Matt Arsenault0e7d8692019-07-24 16:05:53 +00001012 I.eraseFromParent();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001013 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1014 }
1015
1016 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1017 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
1018 return false;
1019
1020 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
1021 const unsigned SextOpc = SrcSize == 8 ?
1022 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
1023 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
1024 .addReg(SrcReg);
Matt Arsenault0e7d8692019-07-24 16:05:53 +00001025 I.eraseFromParent();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001026 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1027 }
1028
1029 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
1030 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1031
1032 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1033 if (DstSize > 32 && SrcSize <= 32) {
1034 // We need a 64-bit register source, but the high bits don't matter.
Daniel Sanders0c476112019-08-15 19:22:08 +00001035 Register ExtReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1036 Register UndefReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001037 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1038 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1039 .addReg(SrcReg)
1040 .addImm(AMDGPU::sub0)
1041 .addReg(UndefReg)
1042 .addImm(AMDGPU::sub1);
1043
1044 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1045 .addReg(ExtReg)
1046 .addImm(SrcSize << 16);
1047
Matt Arsenault0e7d8692019-07-24 16:05:53 +00001048 I.eraseFromParent();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001049 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1050 }
1051
Matt Arsenault5dafcb92019-07-01 13:22:06 +00001052 unsigned Mask;
1053 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1054 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1055 .addReg(SrcReg)
1056 .addImm(Mask);
1057 } else {
1058 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1059 .addReg(SrcReg)
1060 .addImm(SrcSize << 16);
1061 }
1062
Matt Arsenault0e7d8692019-07-24 16:05:53 +00001063 I.eraseFromParent();
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001064 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1065 }
1066
1067 return false;
1068}
1069
Tom Stellardca166212017-01-30 21:56:46 +00001070bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1071 MachineBasicBlock *BB = I.getParent();
1072 MachineFunction *MF = BB->getParent();
1073 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellarde182b282018-05-15 17:57:09 +00001074 MachineOperand &ImmOp = I.getOperand(1);
Tom Stellardca166212017-01-30 21:56:46 +00001075
Tom Stellarde182b282018-05-15 17:57:09 +00001076 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1077 if (ImmOp.isFPImm()) {
1078 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1079 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1080 } else if (ImmOp.isCImm()) {
1081 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1082 }
1083
Daniel Sanders0c476112019-08-15 19:22:08 +00001084 Register DstReg = I.getOperand(0).getReg();
Tom Stellarde182b282018-05-15 17:57:09 +00001085 unsigned Size;
1086 bool IsSgpr;
1087 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1088 if (RB) {
1089 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1090 Size = MRI.getType(DstReg).getSizeInBits();
1091 } else {
1092 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1093 IsSgpr = TRI.isSGPRClass(RC);
Tom Stellarda91ce172018-05-21 17:49:31 +00001094 Size = TRI.getRegSizeInBits(*RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001095 }
1096
1097 if (Size != 32 && Size != 64)
1098 return false;
1099
1100 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Tom Stellardca166212017-01-30 21:56:46 +00001101 if (Size == 32) {
Tom Stellarde182b282018-05-15 17:57:09 +00001102 I.setDesc(TII.get(Opcode));
1103 I.addImplicitDefUseOperands(*MF);
Tom Stellardca166212017-01-30 21:56:46 +00001104 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1105 }
1106
Tom Stellardca166212017-01-30 21:56:46 +00001107 DebugLoc DL = I.getDebugLoc();
Tom Stellarde182b282018-05-15 17:57:09 +00001108 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1109 &AMDGPU::VGPR_32RegClass;
Daniel Sanders0c476112019-08-15 19:22:08 +00001110 Register LoReg = MRI.createVirtualRegister(RC);
1111 Register HiReg = MRI.createVirtualRegister(RC);
Tom Stellarde182b282018-05-15 17:57:09 +00001112 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
Tom Stellardca166212017-01-30 21:56:46 +00001113
Tom Stellarde182b282018-05-15 17:57:09 +00001114 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
Tom Stellardca166212017-01-30 21:56:46 +00001115 .addImm(Imm.trunc(32).getZExtValue());
1116
Tom Stellarde182b282018-05-15 17:57:09 +00001117 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
Tom Stellardca166212017-01-30 21:56:46 +00001118 .addImm(Imm.ashr(32).getZExtValue());
1119
Tom Stellarde182b282018-05-15 17:57:09 +00001120 const MachineInstr *RS =
1121 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1122 .addReg(LoReg)
1123 .addImm(AMDGPU::sub0)
1124 .addReg(HiReg)
1125 .addImm(AMDGPU::sub1);
1126
Tom Stellardca166212017-01-30 21:56:46 +00001127 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1128 // work for target independent opcodes
1129 I.eraseFromParent();
Tom Stellarde182b282018-05-15 17:57:09 +00001130 const TargetRegisterClass *DstRC =
1131 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1132 if (!DstRC)
1133 return true;
1134 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Tom Stellardca166212017-01-30 21:56:46 +00001135}
1136
1137static bool isConstant(const MachineInstr &MI) {
1138 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1139}
1140
1141void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1142 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1143
1144 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1145
1146 assert(PtrMI);
1147
1148 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1149 return;
1150
1151 GEPInfo GEPInfo(*PtrMI);
1152
Matt Arsenaultd51a3742019-09-05 02:20:25 +00001153 for (unsigned i = 1; i != 3; ++i) {
Tom Stellardca166212017-01-30 21:56:46 +00001154 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1155 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1156 assert(OpDef);
Matt Arsenaultd51a3742019-09-05 02:20:25 +00001157 if (i == 2 && isConstant(*OpDef)) {
1158 // TODO: Could handle constant base + variable offset, but a combine
1159 // probably should have commuted it.
Tom Stellardca166212017-01-30 21:56:46 +00001160 assert(GEPInfo.Imm == 0);
1161 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1162 continue;
1163 }
1164 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1165 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1166 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1167 else
1168 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1169 }
1170
1171 AddrInfo.push_back(GEPInfo);
1172 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1173}
1174
Tom Stellard79b5c382019-02-20 21:02:37 +00001175bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
Tom Stellardca166212017-01-30 21:56:46 +00001176 if (!MI.hasOneMemOperand())
1177 return false;
1178
1179 const MachineMemOperand *MMO = *MI.memoperands_begin();
1180 const Value *Ptr = MMO->getValue();
1181
1182 // UndefValue means this is a load of a kernel input. These are uniform.
1183 // Sometimes LDS instructions have constant pointers.
1184 // If Ptr is null, then that means this mem operand contains a
1185 // PseudoSourceValue like GOT.
1186 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1187 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1188 return true;
1189
Matt Arsenault923712b2018-02-09 16:57:57 +00001190 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1191 return true;
1192
Tom Stellardca166212017-01-30 21:56:46 +00001193 const Instruction *I = dyn_cast<Instruction>(Ptr);
1194 return I && I->getMetadata("amdgpu.uniform");
1195}
1196
Tom Stellardca166212017-01-30 21:56:46 +00001197bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1198 for (const GEPInfo &GEPInfo : AddrInfo) {
1199 if (!GEPInfo.VgprParts.empty())
1200 return true;
1201 }
1202 return false;
1203}
1204
Matt Arsenault3baf4d32019-08-01 03:09:15 +00001205void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
Matt Arsenault35940112019-08-01 00:53:38 +00001206 MachineBasicBlock *BB = I.getParent();
1207 MachineFunction *MF = BB->getParent();
1208 MachineRegisterInfo &MRI = MF->getRegInfo();
1209
1210 const LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1211 unsigned AS = PtrTy.getAddressSpace();
1212 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
1213 STI.ldsRequiresM0Init()) {
1214 // If DS instructions require M0 initializtion, insert it before selecting.
1215 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1216 .addImm(-1);
1217 }
Matt Arsenault3baf4d32019-08-01 03:09:15 +00001218}
Matt Arsenault35940112019-08-01 00:53:38 +00001219
Amara Emersone14c91b2019-08-13 06:26:59 +00001220bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
Matt Arsenault3baf4d32019-08-01 03:09:15 +00001221 initM0(I);
Amara Emersone14c91b2019-08-13 06:26:59 +00001222 return selectImpl(I, *CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001223}
1224
Matt Arsenault64642802019-07-01 15:39:27 +00001225bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1226 MachineBasicBlock *BB = I.getParent();
1227 MachineFunction *MF = BB->getParent();
1228 MachineRegisterInfo &MRI = MF->getRegInfo();
1229 MachineOperand &CondOp = I.getOperand(0);
1230 Register CondReg = CondOp.getReg();
1231 const DebugLoc &DL = I.getDebugLoc();
1232
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001233 unsigned BrOpcode;
1234 Register CondPhysReg;
1235 const TargetRegisterClass *ConstrainRC;
1236
1237 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1238 // whether the branch is uniform when selecting the instruction. In
1239 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1240 // RegBankSelect knows what it's doing if the branch condition is scc, even
1241 // though it currently does not.
Matt Arsenault64642802019-07-01 15:39:27 +00001242 if (isSCC(CondReg, MRI)) {
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001243 CondPhysReg = AMDGPU::SCC;
1244 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1245 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1246 } else if (isVCC(CondReg, MRI)) {
1247 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1248 // We sort of know that a VCC producer based on the register bank, that ands
1249 // inactive lanes with 0. What if there was a logical operation with vcc
1250 // producers in different blocks/with different exec masks?
1251 // FIXME: Should scc->vcc copies and with exec?
1252 CondPhysReg = TRI.getVCC();
1253 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1254 ConstrainRC = TRI.getBoolRC();
1255 } else
1256 return false;
Matt Arsenault64642802019-07-01 15:39:27 +00001257
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001258 if (!MRI.getRegClassOrNull(CondReg))
1259 MRI.setRegClass(CondReg, ConstrainRC);
Matt Arsenault64642802019-07-01 15:39:27 +00001260
Matt Arsenault2ab25f92019-07-01 16:06:02 +00001261 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1262 .addReg(CondReg);
1263 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1264 .addMBB(I.getOperand(1).getMBB());
1265
1266 I.eraseFromParent();
1267 return true;
Matt Arsenault64642802019-07-01 15:39:27 +00001268}
1269
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001270bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1271 MachineBasicBlock *BB = I.getParent();
1272 MachineFunction *MF = BB->getParent();
1273 MachineRegisterInfo &MRI = MF->getRegInfo();
1274
1275 Register DstReg = I.getOperand(0).getReg();
1276 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1277 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1278 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1279 if (IsVGPR)
1280 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1281
1282 return RBI.constrainGenericRegister(
1283 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1284}
1285
Matt Arsenaultc34b4032019-09-09 15:46:13 +00001286bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
1287 uint64_t Align = I.getOperand(2).getImm();
1288 const uint64_t Mask = ~((UINT64_C(1) << Align) - 1);
1289
1290 MachineBasicBlock *BB = I.getParent();
1291 MachineFunction *MF = BB->getParent();
1292 MachineRegisterInfo &MRI = MF->getRegInfo();
1293
1294 Register DstReg = I.getOperand(0).getReg();
1295 Register SrcReg = I.getOperand(1).getReg();
1296
1297 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1298 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
1299 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1300 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
1301 unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1302 const TargetRegisterClass &RegRC
1303 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
1304
1305 LLT Ty = MRI.getType(DstReg);
1306
1307 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB,
1308 MRI);
1309 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB,
1310 MRI);
1311 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI) ||
1312 !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
1313 return false;
1314
1315 const DebugLoc &DL = I.getDebugLoc();
1316 Register ImmReg = MRI.createVirtualRegister(&RegRC);
1317 BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg)
1318 .addImm(Mask);
1319
1320 if (Ty.getSizeInBits() == 32) {
1321 BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
1322 .addReg(SrcReg)
1323 .addReg(ImmReg);
1324 I.eraseFromParent();
1325 return true;
1326 }
1327
1328 Register HiReg = MRI.createVirtualRegister(&RegRC);
1329 Register LoReg = MRI.createVirtualRegister(&RegRC);
1330 Register MaskLo = MRI.createVirtualRegister(&RegRC);
1331
1332 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
1333 .addReg(SrcReg, 0, AMDGPU::sub0);
1334 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
1335 .addReg(SrcReg, 0, AMDGPU::sub1);
1336
1337 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo)
1338 .addReg(LoReg)
1339 .addReg(ImmReg);
1340 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1341 .addReg(MaskLo)
1342 .addImm(AMDGPU::sub0)
1343 .addReg(HiReg)
1344 .addImm(AMDGPU::sub1);
1345 I.eraseFromParent();
1346 return true;
1347}
1348
Amara Emersone14c91b2019-08-13 06:26:59 +00001349bool AMDGPUInstructionSelector::select(MachineInstr &I) {
Matt Arsenaulte1006252019-07-01 16:32:47 +00001350 if (I.isPHI())
1351 return selectPHI(I);
Tom Stellardca166212017-01-30 21:56:46 +00001352
Tom Stellard7712ee82018-06-22 00:44:29 +00001353 if (!isPreISelGenericOpcode(I.getOpcode())) {
1354 if (I.isCopy())
1355 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001356 return true;
Tom Stellard7712ee82018-06-22 00:44:29 +00001357 }
Tom Stellardca166212017-01-30 21:56:46 +00001358
1359 switch (I.getOpcode()) {
Matt Arsenaultc8291c92019-07-15 19:50:07 +00001360 case TargetOpcode::G_AND:
1361 case TargetOpcode::G_OR:
1362 case TargetOpcode::G_XOR:
1363 if (selectG_AND_OR_XOR(I))
1364 return true;
Amara Emersone14c91b2019-08-13 06:26:59 +00001365 return selectImpl(I, *CoverageInfo);
Tom Stellard9e9dd302019-07-01 16:09:33 +00001366 case TargetOpcode::G_ADD:
Matt Arsenaulte6d10f92019-07-09 14:05:11 +00001367 case TargetOpcode::G_SUB:
Matt Arsenaultd50f9372019-09-09 15:20:44 +00001368 if (selectImpl(I, *CoverageInfo))
Tom Stellard9e9dd302019-07-01 16:09:33 +00001369 return true;
Matt Arsenaultd50f9372019-09-09 15:20:44 +00001370 return selectG_ADD_SUB(I);
Tom Stellard7c650782018-10-05 04:34:09 +00001371 case TargetOpcode::G_INTTOPTR:
Tom Stellard1e0edad2018-05-10 21:20:10 +00001372 case TargetOpcode::G_BITCAST:
1373 return selectCOPY(I);
Tom Stellardca166212017-01-30 21:56:46 +00001374 case TargetOpcode::G_CONSTANT:
Tom Stellarde182b282018-05-15 17:57:09 +00001375 case TargetOpcode::G_FCONSTANT:
Tom Stellardca166212017-01-30 21:56:46 +00001376 return selectG_CONSTANT(I);
Tom Stellard41f32192019-02-28 23:37:48 +00001377 case TargetOpcode::G_EXTRACT:
1378 return selectG_EXTRACT(I);
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001379 case TargetOpcode::G_MERGE_VALUES:
Matt Arsenaulta65913e2019-07-15 17:26:43 +00001380 case TargetOpcode::G_BUILD_VECTOR:
Matt Arsenault9b7ffc42019-07-09 14:02:20 +00001381 case TargetOpcode::G_CONCAT_VECTORS:
1382 return selectG_MERGE_VALUES(I);
Matt Arsenault872f38b2019-07-09 14:02:26 +00001383 case TargetOpcode::G_UNMERGE_VALUES:
1384 return selectG_UNMERGE_VALUES(I);
Tom Stellardca166212017-01-30 21:56:46 +00001385 case TargetOpcode::G_GEP:
1386 return selectG_GEP(I);
Tom Stellard3f1c6fe2018-06-21 23:38:20 +00001387 case TargetOpcode::G_IMPLICIT_DEF:
1388 return selectG_IMPLICIT_DEF(I);
Tom Stellard33634d1b2019-03-01 00:50:26 +00001389 case TargetOpcode::G_INSERT:
1390 return selectG_INSERT(I);
Tom Stellarda9284732018-06-14 19:26:37 +00001391 case TargetOpcode::G_INTRINSIC:
Amara Emersone14c91b2019-08-13 06:26:59 +00001392 return selectG_INTRINSIC(I);
Tom Stellard390a5f42018-07-13 21:05:14 +00001393 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Amara Emersone14c91b2019-08-13 06:26:59 +00001394 return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001395 case TargetOpcode::G_ICMP:
Matt Arsenault3b7668a2019-07-01 13:34:26 +00001396 if (selectG_ICMP(I))
1397 return true;
Amara Emersone14c91b2019-08-13 06:26:59 +00001398 return selectImpl(I, *CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001399 case TargetOpcode::G_LOAD:
Matt Arsenaultda5b9bf2019-08-01 03:29:01 +00001400 case TargetOpcode::G_ATOMIC_CMPXCHG:
1401 case TargetOpcode::G_ATOMICRMW_XCHG:
1402 case TargetOpcode::G_ATOMICRMW_ADD:
1403 case TargetOpcode::G_ATOMICRMW_SUB:
1404 case TargetOpcode::G_ATOMICRMW_AND:
1405 case TargetOpcode::G_ATOMICRMW_OR:
1406 case TargetOpcode::G_ATOMICRMW_XOR:
1407 case TargetOpcode::G_ATOMICRMW_MIN:
1408 case TargetOpcode::G_ATOMICRMW_MAX:
1409 case TargetOpcode::G_ATOMICRMW_UMIN:
1410 case TargetOpcode::G_ATOMICRMW_UMAX:
Matt Arsenault26cb53b2019-08-01 03:33:15 +00001411 case TargetOpcode::G_ATOMICRMW_FADD:
Amara Emersone14c91b2019-08-13 06:26:59 +00001412 return selectG_LOAD_ATOMICRMW(I);
Tom Stellard8b1c53b2019-06-17 16:27:43 +00001413 case TargetOpcode::G_SELECT:
1414 return selectG_SELECT(I);
Tom Stellardca166212017-01-30 21:56:46 +00001415 case TargetOpcode::G_STORE:
Amara Emersone14c91b2019-08-13 06:26:59 +00001416 return selectG_STORE(I);
Matt Arsenaultdbb6c032019-06-24 18:02:18 +00001417 case TargetOpcode::G_TRUNC:
1418 return selectG_TRUNC(I);
Matt Arsenaultd7ffa2a2019-06-25 13:18:11 +00001419 case TargetOpcode::G_SEXT:
1420 case TargetOpcode::G_ZEXT:
1421 case TargetOpcode::G_ANYEXT:
Matt Arsenault0e7d8692019-07-24 16:05:53 +00001422 return selectG_SZA_EXT(I);
Matt Arsenault64642802019-07-01 15:39:27 +00001423 case TargetOpcode::G_BRCOND:
1424 return selectG_BRCOND(I);
Matt Arsenaultcda82f02019-07-01 15:48:18 +00001425 case TargetOpcode::G_FRAME_INDEX:
1426 return selectG_FRAME_INDEX(I);
Matt Arsenaulted633992019-07-02 14:17:38 +00001427 case TargetOpcode::G_FENCE:
1428 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1429 // is checking for G_CONSTANT
1430 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1431 return true;
Matt Arsenaultc34b4032019-09-09 15:46:13 +00001432 case TargetOpcode::G_PTR_MASK:
1433 return selectG_PTR_MASK(I);
Matt Arsenaultd50f9372019-09-09 15:20:44 +00001434 default:
1435 return selectImpl(I, *CoverageInfo);
Tom Stellardca166212017-01-30 21:56:46 +00001436 }
1437 return false;
1438}
Tom Stellard1dc90202018-05-10 20:53:06 +00001439
Tom Stellard26fac0f2018-06-22 02:54:57 +00001440InstructionSelector::ComplexRendererFns
1441AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1442 return {{
1443 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1444 }};
1445
1446}
1447
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001448std::pair<Register, unsigned>
1449AMDGPUInstructionSelector::selectVOP3ModsImpl(
1450 Register Src, const MachineRegisterInfo &MRI) const {
1451 unsigned Mods = 0;
1452 MachineInstr *MI = MRI.getVRegDef(Src);
1453
1454 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1455 Src = MI->getOperand(1).getReg();
1456 Mods |= SISrcMods::NEG;
1457 MI = MRI.getVRegDef(Src);
1458 }
1459
1460 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1461 Src = MI->getOperand(1).getReg();
1462 Mods |= SISrcMods::ABS;
1463 }
1464
1465 return std::make_pair(Src, Mods);
1466}
1467
Tom Stellard1dc90202018-05-10 20:53:06 +00001468///
1469/// This will select either an SGPR or VGPR operand and will save us from
1470/// having to write an extra tablegen pattern.
1471InstructionSelector::ComplexRendererFns
1472AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1473 return {{
1474 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1475 }};
1476}
Tom Stellarddcc95e92018-05-11 05:44:16 +00001477
1478InstructionSelector::ComplexRendererFns
1479AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001480 MachineRegisterInfo &MRI
1481 = Root.getParent()->getParent()->getParent()->getRegInfo();
1482
1483 Register Src;
1484 unsigned Mods;
1485 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1486
Tom Stellarddcc95e92018-05-11 05:44:16 +00001487 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001488 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1489 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1490 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1491 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Tom Stellarddcc95e92018-05-11 05:44:16 +00001492 }};
1493}
Matt Arsenault77e3e9c2019-09-09 18:29:45 +00001494
1495InstructionSelector::ComplexRendererFns
1496AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const {
1497 MachineRegisterInfo &MRI
1498 = Root.getParent()->getParent()->getParent()->getRegInfo();
1499
1500 Register Src;
1501 unsigned Mods;
1502 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1503
1504 return {{
1505 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1506 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1507 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1508 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1509 }};
1510}
1511
Tom Stellard9a653572018-06-22 02:34:29 +00001512InstructionSelector::ComplexRendererFns
1513AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1514 return {{
1515 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1516 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1517 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1518 }};
1519}
Tom Stellard46bbbc32018-06-13 22:30:47 +00001520
1521InstructionSelector::ComplexRendererFns
1522AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001523 MachineRegisterInfo &MRI
1524 = Root.getParent()->getParent()->getParent()->getRegInfo();
1525
1526 Register Src;
1527 unsigned Mods;
1528 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1529
Tom Stellard46bbbc32018-06-13 22:30:47 +00001530 return {{
Matt Arsenault4f64ade2019-07-01 15:18:56 +00001531 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1532 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
Tom Stellard46bbbc32018-06-13 22:30:47 +00001533 }};
1534}
Tom Stellard79b5c382019-02-20 21:02:37 +00001535
1536InstructionSelector::ComplexRendererFns
Matt Arsenaultd6c1f5b2019-09-09 18:29:37 +00001537AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
1538 // FIXME: Handle clamp and op_sel
1539 return {{
1540 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1541 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods
1542 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp
1543 }};
1544}
1545
1546InstructionSelector::ComplexRendererFns
1547AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
1548 // FIXME: Handle op_sel
1549 return {{
1550 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1551 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
1552 }};
1553}
1554
1555InstructionSelector::ComplexRendererFns
Tom Stellard79b5c382019-02-20 21:02:37 +00001556AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1557 MachineRegisterInfo &MRI =
1558 Root.getParent()->getParent()->getParent()->getRegInfo();
1559
1560 SmallVector<GEPInfo, 4> AddrInfo;
1561 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1562
1563 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1564 return None;
1565
1566 const GEPInfo &GEPInfo = AddrInfo[0];
1567
1568 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1569 return None;
1570
1571 unsigned PtrReg = GEPInfo.SgprParts[0];
1572 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1573 return {{
1574 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1575 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1576 }};
1577}
1578
1579InstructionSelector::ComplexRendererFns
1580AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1581 MachineRegisterInfo &MRI =
1582 Root.getParent()->getParent()->getParent()->getRegInfo();
1583
1584 SmallVector<GEPInfo, 4> AddrInfo;
1585 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1586
1587 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1588 return None;
1589
1590 const GEPInfo &GEPInfo = AddrInfo[0];
1591 unsigned PtrReg = GEPInfo.SgprParts[0];
1592 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1593 if (!isUInt<32>(EncodedImm))
1594 return None;
1595
1596 return {{
1597 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1598 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1599 }};
1600}
1601
1602InstructionSelector::ComplexRendererFns
1603AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1604 MachineInstr *MI = Root.getParent();
1605 MachineBasicBlock *MBB = MI->getParent();
1606 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1607
1608 SmallVector<GEPInfo, 4> AddrInfo;
1609 getAddrModeInfo(*MI, MRI, AddrInfo);
1610
1611 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1612 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1613 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1614 return None;
1615
1616 const GEPInfo &GEPInfo = AddrInfo[0];
1617 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1618 return None;
1619
1620 // If we make it this far we have a load with an 32-bit immediate offset.
1621 // It is OK to select this using a sgpr offset, because we have already
1622 // failed trying to select this load into one of the _IMM variants since
1623 // the _IMM Patterns are considered before the _SGPR patterns.
1624 unsigned PtrReg = GEPInfo.SgprParts[0];
Daniel Sanders0c476112019-08-15 19:22:08 +00001625 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
Tom Stellard79b5c382019-02-20 21:02:37 +00001626 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1627 .addImm(GEPInfo.Imm);
1628 return {{
1629 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1630 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1631 }};
1632}
Matt Arsenault35c96592019-07-16 18:05:29 +00001633
Matt Arsenaultdad1f892019-07-16 18:42:53 +00001634template <bool Signed>
Matt Arsenault35c96592019-07-16 18:05:29 +00001635InstructionSelector::ComplexRendererFns
1636AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1637 MachineInstr *MI = Root.getParent();
1638 MachineBasicBlock *MBB = MI->getParent();
1639 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1640
1641 InstructionSelector::ComplexRendererFns Default = {{
1642 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1643 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset
1644 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
1645 }};
1646
1647 if (!STI.hasFlatInstOffsets())
1648 return Default;
1649
1650 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
1651 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1652 return Default;
1653
1654 Optional<int64_t> Offset =
1655 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
1656 if (!Offset.hasValue())
1657 return Default;
1658
1659 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1660 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1661 return Default;
1662
1663 Register BasePtr = OpDef->getOperand(1).getReg();
1664
1665 return {{
1666 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1667 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1668 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
1669 }};
1670}
1671
1672InstructionSelector::ComplexRendererFns
1673AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
1674 return selectFlatOffsetImpl<false>(Root);
1675}
1676
1677InstructionSelector::ComplexRendererFns
1678AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
1679 return selectFlatOffsetImpl<true>(Root);
1680}
Matt Arsenault7161fb02019-07-16 19:22:21 +00001681
Matt Arsenault7161fb02019-07-16 19:22:21 +00001682static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1683 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1684 return PSV && PSV->isStack();
1685}
1686
1687InstructionSelector::ComplexRendererFns
1688AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
1689 MachineInstr *MI = Root.getParent();
1690 MachineBasicBlock *MBB = MI->getParent();
1691 MachineFunction *MF = MBB->getParent();
1692 MachineRegisterInfo &MRI = MF->getRegInfo();
1693 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1694
1695 int64_t Offset = 0;
1696 if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
1697 Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1698
1699 // TODO: Should this be inside the render function? The iterator seems to
1700 // move.
1701 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
1702 HighBits)
1703 .addImm(Offset & ~4095);
1704
1705 return {{[=](MachineInstrBuilder &MIB) { // rsrc
1706 MIB.addReg(Info->getScratchRSrcReg());
1707 },
1708 [=](MachineInstrBuilder &MIB) { // vaddr
1709 MIB.addReg(HighBits);
1710 },
1711 [=](MachineInstrBuilder &MIB) { // soffset
1712 const MachineMemOperand *MMO = *MI->memoperands_begin();
1713 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
1714
1715 Register SOffsetReg = isStackPtrRelative(PtrInfo)
1716 ? Info->getStackPtrOffsetReg()
1717 : Info->getScratchWaveOffsetReg();
1718 MIB.addReg(SOffsetReg);
1719 },
1720 [=](MachineInstrBuilder &MIB) { // offset
1721 MIB.addImm(Offset & 4095);
1722 }}};
1723 }
1724
1725 assert(Offset == 0);
1726
1727 // Try to fold a frame index directly into the MUBUF vaddr field, and any
1728 // offsets.
1729 Optional<int> FI;
1730 Register VAddr = Root.getReg();
1731 if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
1732 if (isBaseWithConstantOffset(Root, MRI)) {
1733 const MachineOperand &LHS = RootDef->getOperand(1);
1734 const MachineOperand &RHS = RootDef->getOperand(2);
1735 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
1736 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
1737 if (LHSDef && RHSDef) {
1738 int64_t PossibleOffset =
1739 RHSDef->getOperand(1).getCImm()->getSExtValue();
1740 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
1741 (!STI.privateMemoryResourceIsRangeChecked() ||
Matt Arsenault2dd088e2019-09-09 15:39:32 +00001742 KnownBits->signBitIsZero(LHS.getReg()))) {
Matt Arsenault7161fb02019-07-16 19:22:21 +00001743 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
1744 FI = LHSDef->getOperand(1).getIndex();
1745 else
1746 VAddr = LHS.getReg();
1747 Offset = PossibleOffset;
1748 }
1749 }
1750 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
1751 FI = RootDef->getOperand(1).getIndex();
1752 }
1753 }
1754
1755 // If we don't know this private access is a local stack object, it needs to
1756 // be relative to the entry point's scratch wave offset register.
1757 // TODO: Should split large offsets that don't fit like above.
1758 // TODO: Don't use scratch wave offset just because the offset didn't fit.
1759 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
1760 : Info->getScratchWaveOffsetReg();
1761
1762 return {{[=](MachineInstrBuilder &MIB) { // rsrc
1763 MIB.addReg(Info->getScratchRSrcReg());
1764 },
1765 [=](MachineInstrBuilder &MIB) { // vaddr
1766 if (FI.hasValue())
1767 MIB.addFrameIndex(FI.getValue());
1768 else
1769 MIB.addReg(VAddr);
1770 },
1771 [=](MachineInstrBuilder &MIB) { // soffset
1772 MIB.addReg(SOffset);
1773 },
1774 [=](MachineInstrBuilder &MIB) { // offset
1775 MIB.addImm(Offset);
1776 }}};
1777}
1778
Matt Arsenault35940112019-08-01 00:53:38 +00001779bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
1780 const MachineOperand &Base,
1781 int64_t Offset,
1782 unsigned OffsetBits) const {
1783 if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1784 (OffsetBits == 8 && !isUInt<8>(Offset)))
1785 return false;
1786
1787 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
1788 return true;
1789
1790 // On Southern Islands instruction with a negative base value and an offset
1791 // don't seem to work.
Matt Arsenault2dd088e2019-09-09 15:39:32 +00001792 return KnownBits->signBitIsZero(Base.getReg());
Matt Arsenault35940112019-08-01 00:53:38 +00001793}
1794
Matt Arsenault7161fb02019-07-16 19:22:21 +00001795InstructionSelector::ComplexRendererFns
1796AMDGPUInstructionSelector::selectMUBUFScratchOffset(
1797 MachineOperand &Root) const {
1798 MachineInstr *MI = Root.getParent();
1799 MachineBasicBlock *MBB = MI->getParent();
1800 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1801
1802 int64_t Offset = 0;
1803 if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
1804 !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
1805 return {};
1806
1807 const MachineFunction *MF = MBB->getParent();
1808 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1809 const MachineMemOperand *MMO = *MI->memoperands_begin();
1810 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
1811
1812 Register SOffsetReg = isStackPtrRelative(PtrInfo)
1813 ? Info->getStackPtrOffsetReg()
1814 : Info->getScratchWaveOffsetReg();
1815 return {{
1816 [=](MachineInstrBuilder &MIB) {
1817 MIB.addReg(Info->getScratchRSrcReg());
1818 }, // rsrc
1819 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
1820 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
1821 }};
1822}
Matt Arsenault35940112019-08-01 00:53:38 +00001823
1824InstructionSelector::ComplexRendererFns
1825AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
1826 MachineInstr *MI = Root.getParent();
1827 MachineBasicBlock *MBB = MI->getParent();
1828 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1829
1830 const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
1831 if (!RootDef) {
1832 return {{
1833 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1834 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
1835 }};
1836 }
1837
1838 int64_t ConstAddr = 0;
1839 if (isBaseWithConstantOffset(Root, MRI)) {
1840 const MachineOperand &LHS = RootDef->getOperand(1);
1841 const MachineOperand &RHS = RootDef->getOperand(2);
1842 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
1843 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
1844 if (LHSDef && RHSDef) {
1845 int64_t PossibleOffset =
1846 RHSDef->getOperand(1).getCImm()->getSExtValue();
1847 if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) {
1848 // (add n0, c0)
1849 return {{
1850 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
1851 [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
1852 }};
1853 }
1854 }
1855 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
1856
1857
1858
1859 } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) {
1860
1861
1862 }
1863
1864 return {{
1865 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1866 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
1867 }};
1868}