blob: 6e5e9825eb8f8968c805138aead667164a5d6935 [file] [log] [blame]
Tom Stellardca166212017-01-30 21:56:46 +00001//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file implements the targeting of the InstructionSelector class for
11/// AMDGPU.
12/// \todo This should be generated by TableGen.
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUInstructionSelector.h"
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterBankInfo.h"
18#include "AMDGPURegisterInfo.h"
19#include "AMDGPUSubtarget.h"
20#include "llvm/CodeGen/MachineBasicBlock.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstr.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/IR/Type.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28
29#define DEBUG_TYPE "amdgpu-isel"
30
31using namespace llvm;
32
33AMDGPUInstructionSelector::AMDGPUInstructionSelector(
34 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
35 : InstructionSelector(), TII(*STI.getInstrInfo()),
36 TRI(*STI.getRegisterInfo()), RBI(RBI) {}
37
38MachineOperand
39AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
40 unsigned SubIdx) const {
41
42 MachineInstr *MI = MO.getParent();
43 MachineBasicBlock *BB = MO.getParent()->getParent();
44 MachineFunction *MF = BB->getParent();
45 MachineRegisterInfo &MRI = MF->getRegInfo();
46 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
47
48 if (MO.isReg()) {
49 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
50 unsigned Reg = MO.getReg();
51 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
52 .addReg(Reg, 0, ComposedSubIdx);
53
54 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
55 MO.isKill(), MO.isDead(), MO.isUndef(),
56 MO.isEarlyClobber(), 0, MO.isDebug(),
57 MO.isInternalRead());
58 }
59
60 assert(MO.isImm());
61
62 APInt Imm(64, MO.getImm());
63
64 switch (SubIdx) {
65 default:
66 llvm_unreachable("do not know to split immediate with this sub index.");
67 case AMDGPU::sub0:
68 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
69 case AMDGPU::sub1:
70 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
71 }
72}
73
74bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
75 MachineBasicBlock *BB = I.getParent();
76 MachineFunction *MF = BB->getParent();
77 MachineRegisterInfo &MRI = MF->getRegInfo();
78 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
79 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
80 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
81
82 if (Size != 64)
83 return false;
84
85 DebugLoc DL = I.getDebugLoc();
86
87 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
88 .add(getSubOperand64(I.getOperand(1), AMDGPU::sub0))
89 .add(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
90
91 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
92 .add(getSubOperand64(I.getOperand(1), AMDGPU::sub1))
93 .add(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
94
95 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
96 .addReg(DstLo)
97 .addImm(AMDGPU::sub0)
98 .addReg(DstHi)
99 .addImm(AMDGPU::sub1);
100
101 for (MachineOperand &MO : I.explicit_operands()) {
102 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
103 continue;
104 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
105 }
106
107 I.eraseFromParent();
108 return true;
109}
110
111bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
112 return selectG_ADD(I);
113}
114
115bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
116 MachineBasicBlock *BB = I.getParent();
117 DebugLoc DL = I.getDebugLoc();
118
119 // FIXME: Select store instruction based on address space
120 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
121 .add(I.getOperand(1))
122 .add(I.getOperand(0))
123 .addImm(0)
124 .addImm(0)
125 .addImm(0);
126
127 // Now that we selected an opcode, we need to constrain the register
128 // operands to use appropriate classes.
129 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
130
131 I.eraseFromParent();
132 return Ret;
133}
134
135bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
136 MachineBasicBlock *BB = I.getParent();
137 MachineFunction *MF = BB->getParent();
138 MachineRegisterInfo &MRI = MF->getRegInfo();
139 unsigned DstReg = I.getOperand(0).getReg();
140 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
141
142 if (Size == 32) {
143 I.setDesc(TII.get(AMDGPU::S_MOV_B32));
144 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
145 }
146
147 assert(Size == 64);
148
149 DebugLoc DL = I.getDebugLoc();
150 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
151 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
152 const APInt &Imm = I.getOperand(1).getCImm()->getValue();
153
154 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
155 .addImm(Imm.trunc(32).getZExtValue());
156
157 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
158 .addImm(Imm.ashr(32).getZExtValue());
159
160 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
161 .addReg(LoReg)
162 .addImm(AMDGPU::sub0)
163 .addReg(HiReg)
164 .addImm(AMDGPU::sub1);
165 // We can't call constrainSelectedInstRegOperands here, because it doesn't
166 // work for target independent opcodes
167 I.eraseFromParent();
168 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
169}
170
171static bool isConstant(const MachineInstr &MI) {
172 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
173}
174
175void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
176 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
177
178 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
179
180 assert(PtrMI);
181
182 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
183 return;
184
185 GEPInfo GEPInfo(*PtrMI);
186
187 for (unsigned i = 1, e = 3; i < e; ++i) {
188 const MachineOperand &GEPOp = PtrMI->getOperand(i);
189 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
190 assert(OpDef);
191 if (isConstant(*OpDef)) {
192 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
193 // are lacking other optimizations.
194 assert(GEPInfo.Imm == 0);
195 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
196 continue;
197 }
198 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
199 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
200 GEPInfo.SgprParts.push_back(GEPOp.getReg());
201 else
202 GEPInfo.VgprParts.push_back(GEPOp.getReg());
203 }
204
205 AddrInfo.push_back(GEPInfo);
206 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
207}
208
209static bool isInstrUniform(const MachineInstr &MI) {
210 if (!MI.hasOneMemOperand())
211 return false;
212
213 const MachineMemOperand *MMO = *MI.memoperands_begin();
214 const Value *Ptr = MMO->getValue();
215
216 // UndefValue means this is a load of a kernel input. These are uniform.
217 // Sometimes LDS instructions have constant pointers.
218 // If Ptr is null, then that means this mem operand contains a
219 // PseudoSourceValue like GOT.
220 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
221 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
222 return true;
223
224 const Instruction *I = dyn_cast<Instruction>(Ptr);
225 return I && I->getMetadata("amdgpu.uniform");
226}
227
228static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
229
230 if (LoadSize == 32)
231 return BaseOpcode;
232
233 switch (BaseOpcode) {
234 case AMDGPU::S_LOAD_DWORD_IMM:
235 switch (LoadSize) {
236 case 64:
237 return AMDGPU::S_LOAD_DWORDX2_IMM;
238 case 128:
239 return AMDGPU::S_LOAD_DWORDX4_IMM;
240 case 256:
241 return AMDGPU::S_LOAD_DWORDX8_IMM;
242 case 512:
243 return AMDGPU::S_LOAD_DWORDX16_IMM;
244 }
245 break;
246 case AMDGPU::S_LOAD_DWORD_IMM_ci:
247 switch (LoadSize) {
248 case 64:
249 return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
250 case 128:
251 return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
252 case 256:
253 return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
254 case 512:
255 return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
256 }
257 break;
258 case AMDGPU::S_LOAD_DWORD_SGPR:
259 switch (LoadSize) {
260 case 64:
261 return AMDGPU::S_LOAD_DWORDX2_SGPR;
262 case 128:
263 return AMDGPU::S_LOAD_DWORDX4_SGPR;
264 case 256:
265 return AMDGPU::S_LOAD_DWORDX8_SGPR;
266 case 512:
267 return AMDGPU::S_LOAD_DWORDX16_SGPR;
268 }
269 break;
270 }
271 llvm_unreachable("Invalid base smrd opcode or size");
272}
273
274bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
275 for (const GEPInfo &GEPInfo : AddrInfo) {
276 if (!GEPInfo.VgprParts.empty())
277 return true;
278 }
279 return false;
280}
281
282bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
283 ArrayRef<GEPInfo> AddrInfo) const {
284
285 if (!I.hasOneMemOperand())
286 return false;
287
288 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
289 return false;
290
291 if (!isInstrUniform(I))
292 return false;
293
294 if (hasVgprParts(AddrInfo))
295 return false;
296
297 MachineBasicBlock *BB = I.getParent();
298 MachineFunction *MF = BB->getParent();
299 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
300 MachineRegisterInfo &MRI = MF->getRegInfo();
301 unsigned DstReg = I.getOperand(0).getReg();
302 const DebugLoc &DL = I.getDebugLoc();
303 unsigned Opcode;
304 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
305
306 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
307
308 const GEPInfo &GEPInfo = AddrInfo[0];
309
310 unsigned PtrReg = GEPInfo.SgprParts[0];
311 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
312 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
313 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
314
315 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
316 .addReg(PtrReg)
317 .addImm(EncodedImm)
318 .addImm(0); // glc
319 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
320 }
321
322 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
323 isUInt<32>(EncodedImm)) {
324 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
325 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
326 .addReg(PtrReg)
327 .addImm(EncodedImm)
328 .addImm(0); // glc
329 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
330 }
331
332 if (isUInt<32>(GEPInfo.Imm)) {
333 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
334 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
335 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
336 .addImm(GEPInfo.Imm);
337
338 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
339 .addReg(PtrReg)
340 .addReg(OffsetReg)
341 .addImm(0); // glc
342 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
343 }
344 }
345
346 unsigned PtrReg = I.getOperand(1).getReg();
347 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
348 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
349 .addReg(PtrReg)
350 .addImm(0)
351 .addImm(0); // glc
352 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
353}
354
355
356bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
357 MachineBasicBlock *BB = I.getParent();
358 MachineFunction *MF = BB->getParent();
359 MachineRegisterInfo &MRI = MF->getRegInfo();
360 DebugLoc DL = I.getDebugLoc();
361 unsigned DstReg = I.getOperand(0).getReg();
362 unsigned PtrReg = I.getOperand(1).getReg();
363 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
364 unsigned Opcode;
365
366 SmallVector<GEPInfo, 4> AddrInfo;
367
368 getAddrModeInfo(I, MRI, AddrInfo);
369
370 if (selectSMRD(I, AddrInfo)) {
371 I.eraseFromParent();
372 return true;
373 }
374
375 switch (LoadSize) {
376 default:
377 llvm_unreachable("Load size not supported\n");
378 case 32:
379 Opcode = AMDGPU::FLAT_LOAD_DWORD;
380 break;
381 case 64:
382 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
383 break;
384 }
385
386 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
387 .add(I.getOperand(0))
388 .addReg(PtrReg)
389 .addImm(0)
390 .addImm(0)
391 .addImm(0);
392
393 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
394 I.eraseFromParent();
395 return Ret;
396}
397
398bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
399
400 if (!isPreISelGenericOpcode(I.getOpcode()))
401 return true;
402
403 switch (I.getOpcode()) {
404 default:
405 break;
406 case TargetOpcode::G_ADD:
407 return selectG_ADD(I);
408 case TargetOpcode::G_CONSTANT:
409 return selectG_CONSTANT(I);
410 case TargetOpcode::G_GEP:
411 return selectG_GEP(I);
412 case TargetOpcode::G_LOAD:
413 return selectG_LOAD(I);
414 case TargetOpcode::G_STORE:
415 return selectG_STORE(I);
416 }
417 return false;
418}