blob: 0b0d038803193365b387f020f7494b396fc092ea [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
Tom Stellard75aadc22012-12-11 21:25:42 +000015#include "SIInstrInfo.h"
16#include "AMDGPUTargetMachine.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000017#include "GCNHazardRecognizer.h"
Tom Stellard16a9a202013-08-14 23:24:17 +000018#include "SIDefines.h"
Tom Stellardc149dc02013-11-27 21:23:35 +000019#include "SIMachineFunctionInfo.h"
Tom Stellardc5cf2f02014-08-21 20:40:54 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000023#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000024#include "llvm/CodeGen/ScheduleDAG.h"
Matt Arsenault21a43822017-04-06 21:09:53 +000025#include "llvm/IR/DiagnosticInfo.h"
Tom Stellard4e07b1d2014-06-10 21:20:41 +000026#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000027#include "llvm/MC/MCInstrDesc.h"
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +000028#include "llvm/Support/Debug.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000029
30using namespace llvm;
31
Matt Arsenault6bc43d82016-10-06 16:20:41 +000032// Must be at least 4 to be able to branch over minimum unconditional branch
33// code. This is only for making it possible to write reasonably small tests for
34// long branches.
35static cl::opt<unsigned>
36BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
37 cl::desc("Restrict range of branch instructions (DEBUG)"));
38
Matt Arsenault43e92fe2016-06-24 06:30:11 +000039SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
Matt Arsenaulte0bf7d02017-02-21 19:12:08 +000040 : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000041
Tom Stellard82166022013-11-13 23:36:37 +000042//===----------------------------------------------------------------------===//
43// TargetInstrInfo callbacks
44//===----------------------------------------------------------------------===//
45
Matt Arsenaultc10853f2014-08-06 00:29:43 +000046static unsigned getNumOperandsNoGlue(SDNode *Node) {
47 unsigned N = Node->getNumOperands();
48 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
49 --N;
50 return N;
51}
52
53static SDValue findChainOperand(SDNode *Load) {
54 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
55 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
56 return LastOp;
57}
58
Tom Stellard155bbb72014-08-11 22:18:17 +000059/// \brief Returns true if both nodes have the same value for the given
60/// operand \p Op, or if both nodes do not have this operand.
61static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
62 unsigned Opc0 = N0->getMachineOpcode();
63 unsigned Opc1 = N1->getMachineOpcode();
64
65 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
66 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
67
68 if (Op0Idx == -1 && Op1Idx == -1)
69 return true;
70
71
72 if ((Op0Idx == -1 && Op1Idx != -1) ||
73 (Op1Idx == -1 && Op0Idx != -1))
74 return false;
75
76 // getNamedOperandIdx returns the index for the MachineInstr's operands,
77 // which includes the result as the first operand. We are indexing into the
78 // MachineSDNode's operands, so we need to skip the result operand to get
79 // the real index.
80 --Op0Idx;
81 --Op1Idx;
82
Tom Stellardb8b84132014-09-03 15:22:39 +000083 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard155bbb72014-08-11 22:18:17 +000084}
85
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000086bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
Matt Arsenaulta48b8662015-04-23 23:34:48 +000087 AliasAnalysis *AA) const {
88 // TODO: The generic check fails for VALU instructions that should be
89 // rematerializable due to implicit reads of exec. We really want all of the
90 // generic logic for this except for this.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000091 switch (MI.getOpcode()) {
Matt Arsenaulta48b8662015-04-23 23:34:48 +000092 case AMDGPU::V_MOV_B32_e32:
93 case AMDGPU::V_MOV_B32_e64:
Matt Arsenault80f766a2015-09-10 01:23:28 +000094 case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenaulta48b8662015-04-23 23:34:48 +000095 return true;
96 default:
97 return false;
98 }
99}
100
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000101bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
102 int64_t &Offset0,
103 int64_t &Offset1) const {
104 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
105 return false;
106
107 unsigned Opc0 = Load0->getMachineOpcode();
108 unsigned Opc1 = Load1->getMachineOpcode();
109
110 // Make sure both are actually loads.
111 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
112 return false;
113
114 if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard20fa0be2014-10-07 21:09:20 +0000115
116 // FIXME: Handle this case:
117 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
118 return false;
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000119
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000120 // Check base reg.
121 if (Load0->getOperand(1) != Load1->getOperand(1))
122 return false;
123
124 // Check chain.
125 if (findChainOperand(Load0) != findChainOperand(Load1))
126 return false;
127
Matt Arsenault972c12a2014-09-17 17:48:32 +0000128 // Skip read2 / write2 variants for simplicity.
129 // TODO: We should report true if the used offsets are adjacent (excluded
130 // st64 versions).
131 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
132 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
133 return false;
134
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000135 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
136 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
137 return true;
138 }
139
140 if (isSMRD(Opc0) && isSMRD(Opc1)) {
Nicolai Haehnleef449782017-04-24 16:53:52 +0000141 // Skip time and cache invalidation instructions.
142 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
143 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
144 return false;
145
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000146 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
147
148 // Check base reg.
149 if (Load0->getOperand(0) != Load1->getOperand(0))
150 return false;
151
Tom Stellardf0a575f2015-03-23 16:06:01 +0000152 const ConstantSDNode *Load0Offset =
153 dyn_cast<ConstantSDNode>(Load0->getOperand(1));
154 const ConstantSDNode *Load1Offset =
155 dyn_cast<ConstantSDNode>(Load1->getOperand(1));
156
157 if (!Load0Offset || !Load1Offset)
158 return false;
159
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000160 // Check chain.
161 if (findChainOperand(Load0) != findChainOperand(Load1))
162 return false;
163
Tom Stellardf0a575f2015-03-23 16:06:01 +0000164 Offset0 = Load0Offset->getZExtValue();
165 Offset1 = Load1Offset->getZExtValue();
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000166 return true;
167 }
168
169 // MUBUF and MTBUF can access the same addresses.
170 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000171
172 // MUBUF and MTBUF have vaddr at different indices.
Tom Stellard155bbb72014-08-11 22:18:17 +0000173 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
174 findChainOperand(Load0) != findChainOperand(Load1) ||
175 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
Tom Stellardb8b84132014-09-03 15:22:39 +0000176 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000177 return false;
178
Tom Stellard155bbb72014-08-11 22:18:17 +0000179 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
180 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
181
182 if (OffIdx0 == -1 || OffIdx1 == -1)
183 return false;
184
185 // getNamedOperandIdx returns the index for MachineInstrs. Since they
186 // inlcude the output in the operand list, but SDNodes don't, we need to
187 // subtract the index by one.
188 --OffIdx0;
189 --OffIdx1;
190
191 SDValue Off0 = Load0->getOperand(OffIdx0);
192 SDValue Off1 = Load1->getOperand(OffIdx1);
193
194 // The offset might be a FrameIndexSDNode.
195 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
196 return false;
197
198 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
199 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000200 return true;
201 }
202
203 return false;
204}
205
Matt Arsenault2e991122014-09-10 23:26:16 +0000206static bool isStride64(unsigned Opc) {
207 switch (Opc) {
208 case AMDGPU::DS_READ2ST64_B32:
209 case AMDGPU::DS_READ2ST64_B64:
210 case AMDGPU::DS_WRITE2ST64_B32:
211 case AMDGPU::DS_WRITE2ST64_B64:
212 return true;
213 default:
214 return false;
215 }
216}
217
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000218bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
Chad Rosierc27a18f2016-03-09 16:00:35 +0000219 int64_t &Offset,
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000220 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000221 unsigned Opc = LdSt.getOpcode();
Matt Arsenault3add6432015-10-20 04:35:43 +0000222
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000223 if (isDS(LdSt)) {
224 const MachineOperand *OffsetImm =
225 getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000226 if (OffsetImm) {
227 // Normal, single offset LDS instruction.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000228 const MachineOperand *AddrReg =
229 getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000230
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000231 BaseReg = AddrReg->getReg();
232 Offset = OffsetImm->getImm();
233 return true;
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000234 }
235
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000236 // The 2 offset instructions use offset0 and offset1 instead. We can treat
237 // these as a load with a single offset if the 2 offsets are consecutive. We
238 // will use this for some partially aligned loads.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000239 const MachineOperand *Offset0Imm =
240 getNamedOperand(LdSt, AMDGPU::OpName::offset0);
241 const MachineOperand *Offset1Imm =
242 getNamedOperand(LdSt, AMDGPU::OpName::offset1);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000243
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000244 uint8_t Offset0 = Offset0Imm->getImm();
245 uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000246
Matt Arsenault84db5d92015-07-14 17:57:36 +0000247 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000248 // Each of these offsets is in element sized units, so we need to convert
249 // to bytes of the individual reads.
250
251 unsigned EltSize;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000252 if (LdSt.mayLoad())
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000253 EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000254 else {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000255 assert(LdSt.mayStore());
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000256 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000257 EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000258 }
259
Matt Arsenault2e991122014-09-10 23:26:16 +0000260 if (isStride64(Opc))
261 EltSize *= 64;
262
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000263 const MachineOperand *AddrReg =
264 getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000265 BaseReg = AddrReg->getReg();
266 Offset = EltSize * Offset0;
267 return true;
268 }
269
270 return false;
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000271 }
272
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000273 if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
Matt Arsenault36666292016-11-15 20:14:27 +0000274 const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
275 if (SOffset && SOffset->isReg())
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000276 return false;
277
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000278 const MachineOperand *AddrReg =
279 getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000280 if (!AddrReg)
281 return false;
282
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000283 const MachineOperand *OffsetImm =
284 getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000285 BaseReg = AddrReg->getReg();
286 Offset = OffsetImm->getImm();
Matt Arsenault36666292016-11-15 20:14:27 +0000287
288 if (SOffset) // soffset can be an inline immediate.
289 Offset += SOffset->getImm();
290
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000291 return true;
292 }
293
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000294 if (isSMRD(LdSt)) {
295 const MachineOperand *OffsetImm =
296 getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000297 if (!OffsetImm)
298 return false;
299
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000300 const MachineOperand *SBaseReg =
301 getNamedOperand(LdSt, AMDGPU::OpName::sbase);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000302 BaseReg = SBaseReg->getReg();
303 Offset = OffsetImm->getImm();
304 return true;
305 }
306
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000307 if (isFLAT(LdSt)) {
Matt Arsenault37a58e02017-07-21 18:06:36 +0000308 const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
309 if (VAddr) {
310 // Can't analyze 2 offsets.
311 if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
312 return false;
313
314 BaseReg = VAddr->getReg();
315 } else {
316 // scratch instructions have either vaddr or saddr.
317 BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
318 }
319
320 Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
Matt Arsenault43578ec2016-06-02 20:05:20 +0000321 return true;
322 }
323
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000324 return false;
325}
326
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000327bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
328 MachineInstr &SecondLdSt,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +0000329 unsigned NumLoads) const {
NAKAMURA Takumife1202c2016-06-20 00:37:41 +0000330 const MachineOperand *FirstDst = nullptr;
331 const MachineOperand *SecondDst = nullptr;
Tom Stellarda76bcc22016-03-28 16:10:13 +0000332
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000333 if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
Matt Arsenault74f64832017-02-01 20:22:51 +0000334 (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
335 (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000336 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
337 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
Matt Arsenault437fd712016-11-29 19:30:41 +0000338 } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
339 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
340 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
341 } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
342 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
343 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Tom Stellarda76bcc22016-03-28 16:10:13 +0000344 }
345
346 if (!FirstDst || !SecondDst)
Matt Arsenault0e75a062014-09-17 17:48:30 +0000347 return false;
348
Tom Stellarda76bcc22016-03-28 16:10:13 +0000349 // Try to limit clustering based on the total number of bytes loaded
350 // rather than the number of instructions. This is done to help reduce
351 // register pressure. The method used is somewhat inexact, though,
352 // because it assumes that all loads in the cluster will load the
353 // same number of bytes as FirstLdSt.
Matt Arsenault0e75a062014-09-17 17:48:30 +0000354
Tom Stellarda76bcc22016-03-28 16:10:13 +0000355 // The unit of this value is bytes.
356 // FIXME: This needs finer tuning.
357 unsigned LoadClusterThreshold = 16;
Matt Arsenault0e75a062014-09-17 17:48:30 +0000358
Tom Stellarda76bcc22016-03-28 16:10:13 +0000359 const MachineRegisterInfo &MRI =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000360 FirstLdSt.getParent()->getParent()->getRegInfo();
Tom Stellarda76bcc22016-03-28 16:10:13 +0000361 const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
362
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000363 return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
Matt Arsenault0e75a062014-09-17 17:48:30 +0000364}
365
Matt Arsenault21a43822017-04-06 21:09:53 +0000366static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
367 MachineBasicBlock::iterator MI,
368 const DebugLoc &DL, unsigned DestReg,
369 unsigned SrcReg, bool KillSrc) {
370 MachineFunction *MF = MBB.getParent();
371 DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(),
372 "illegal SGPR to VGPR copy",
373 DL, DS_Error);
374 LLVMContext &C = MF->getFunction()->getContext();
375 C.diagnose(IllegalCopy);
376
377 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
378 .addReg(SrcReg, getKillRegState(KillSrc));
379}
380
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000381void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
382 MachineBasicBlock::iterator MI,
383 const DebugLoc &DL, unsigned DestReg,
384 unsigned SrcReg, bool KillSrc) const {
Matt Arsenault314cbf72016-11-07 16:39:22 +0000385 const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
Christian Konigd0e3da12013-03-01 09:46:27 +0000386
Matt Arsenault314cbf72016-11-07 16:39:22 +0000387 if (RC == &AMDGPU::VGPR_32RegClass) {
388 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
389 AMDGPU::SReg_32RegClass.contains(SrcReg));
390 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
391 .addReg(SrcReg, getKillRegState(KillSrc));
392 return;
393 }
Christian Konigd0e3da12013-03-01 09:46:27 +0000394
Marek Olsak79c05872016-11-25 17:37:09 +0000395 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
396 RC == &AMDGPU::SReg_32RegClass) {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000397 if (SrcReg == AMDGPU::SCC) {
398 BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
399 .addImm(-1)
400 .addImm(0);
401 return;
402 }
403
Matt Arsenault21a43822017-04-06 21:09:53 +0000404 if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
405 reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
406 return;
407 }
408
Christian Konigd0e3da12013-03-01 09:46:27 +0000409 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
410 .addReg(SrcReg, getKillRegState(KillSrc));
411 return;
Matt Arsenault314cbf72016-11-07 16:39:22 +0000412 }
Christian Konigd0e3da12013-03-01 09:46:27 +0000413
Matt Arsenault314cbf72016-11-07 16:39:22 +0000414 if (RC == &AMDGPU::SReg_64RegClass) {
Matt Arsenault834b1aa2015-02-14 02:55:54 +0000415 if (DestReg == AMDGPU::VCC) {
Matt Arsenault99981682015-02-14 02:55:56 +0000416 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
417 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
418 .addReg(SrcReg, getKillRegState(KillSrc));
419 } else {
420 // FIXME: Hack until VReg_1 removed.
421 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000422 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
Matt Arsenault99981682015-02-14 02:55:56 +0000423 .addImm(0)
424 .addReg(SrcReg, getKillRegState(KillSrc));
425 }
Matt Arsenault834b1aa2015-02-14 02:55:54 +0000426
Matt Arsenault834b1aa2015-02-14 02:55:54 +0000427 return;
428 }
429
Matt Arsenault21a43822017-04-06 21:09:53 +0000430 if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
431 reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
432 return;
433 }
434
Tom Stellard75aadc22012-12-11 21:25:42 +0000435 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
436 .addReg(SrcReg, getKillRegState(KillSrc));
Christian Konigd0e3da12013-03-01 09:46:27 +0000437 return;
Christian Konigd0e3da12013-03-01 09:46:27 +0000438 }
439
Matt Arsenault314cbf72016-11-07 16:39:22 +0000440 if (DestReg == AMDGPU::SCC) {
441 assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
442 BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
443 .addReg(SrcReg, getKillRegState(KillSrc))
444 .addImm(0);
445 return;
446 }
447
448 unsigned EltSize = 4;
449 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
450 if (RI.isSGPRClass(RC)) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000451 if (RI.getRegSizeInBits(*RC) > 32) {
Matt Arsenault314cbf72016-11-07 16:39:22 +0000452 Opcode = AMDGPU::S_MOV_B64;
453 EltSize = 8;
454 } else {
455 Opcode = AMDGPU::S_MOV_B32;
456 EltSize = 4;
457 }
Matt Arsenault21a43822017-04-06 21:09:53 +0000458
459 if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
460 reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
461 return;
462 }
Matt Arsenault314cbf72016-11-07 16:39:22 +0000463 }
464
Matt Arsenault21a43822017-04-06 21:09:53 +0000465
Matt Arsenault314cbf72016-11-07 16:39:22 +0000466 ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
Matt Arsenault73d2f892016-07-15 22:32:02 +0000467 bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
Nicolai Haehnledd587052015-12-19 01:16:06 +0000468
469 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
470 unsigned SubIdx;
471 if (Forward)
472 SubIdx = SubIndices[Idx];
473 else
474 SubIdx = SubIndices[SubIndices.size() - Idx - 1];
475
Christian Konigd0e3da12013-03-01 09:46:27 +0000476 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
477 get(Opcode), RI.getSubReg(DestReg, SubIdx));
478
Nicolai Haehnledd587052015-12-19 01:16:06 +0000479 Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konigd0e3da12013-03-01 09:46:27 +0000480
Nicolai Haehnledd587052015-12-19 01:16:06 +0000481 if (Idx == 0)
Christian Konigd0e3da12013-03-01 09:46:27 +0000482 Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
Matt Arsenault73d2f892016-07-15 22:32:02 +0000483
Matt Arsenault05c26472017-06-12 17:19:20 +0000484 bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
485 Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000486 }
487}
488
Matt Arsenaultbbb47da2016-09-08 17:19:29 +0000489int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
Christian Konig3c145802013-03-27 09:12:59 +0000490 int NewOpc;
491
492 // Try to map original to commuted opcode
Marek Olsak191507e2015-02-03 17:38:12 +0000493 NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsakcfbdba22015-06-26 20:29:10 +0000494 if (NewOpc != -1)
495 // Check if the commuted (REV) opcode exists on the target.
496 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig3c145802013-03-27 09:12:59 +0000497
498 // Try to map commuted to original opcode
Marek Olsak191507e2015-02-03 17:38:12 +0000499 NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsakcfbdba22015-06-26 20:29:10 +0000500 if (NewOpc != -1)
501 // Check if the original (non-REV) opcode exists on the target.
502 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig3c145802013-03-27 09:12:59 +0000503
504 return Opcode;
505}
506
Jan Sjodina06bfe02017-05-15 20:18:37 +0000507void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
508 MachineBasicBlock::iterator MI,
509 const DebugLoc &DL, unsigned DestReg,
510 int64_t Value) const {
511 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
512 const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
513 if (RegClass == &AMDGPU::SReg_32RegClass ||
514 RegClass == &AMDGPU::SGPR_32RegClass ||
515 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
516 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
517 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
518 .addImm(Value);
519 return;
520 }
521
522 if (RegClass == &AMDGPU::SReg_64RegClass ||
523 RegClass == &AMDGPU::SGPR_64RegClass ||
524 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
525 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
526 .addImm(Value);
527 return;
528 }
529
530 if (RegClass == &AMDGPU::VGPR_32RegClass) {
531 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
532 .addImm(Value);
533 return;
534 }
535 if (RegClass == &AMDGPU::VReg_64RegClass) {
536 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
537 .addImm(Value);
538 return;
539 }
540
541 unsigned EltSize = 4;
542 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
543 if (RI.isSGPRClass(RegClass)) {
544 if (RI.getRegSizeInBits(*RegClass) > 32) {
545 Opcode = AMDGPU::S_MOV_B64;
546 EltSize = 8;
547 } else {
548 Opcode = AMDGPU::S_MOV_B32;
549 EltSize = 4;
550 }
551 }
552
553 ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
554 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
555 int64_t IdxValue = Idx == 0 ? Value : 0;
556
557 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
558 get(Opcode), RI.getSubReg(DestReg, Idx));
559 Builder.addImm(IdxValue);
560 }
561}
562
563const TargetRegisterClass *
564SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
565 return &AMDGPU::VGPR_32RegClass;
566}
567
568void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
569 MachineBasicBlock::iterator I,
570 const DebugLoc &DL, unsigned DstReg,
571 ArrayRef<MachineOperand> Cond,
572 unsigned TrueReg,
573 unsigned FalseReg) const {
574 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
NAKAMURA Takumi994a43d2017-05-16 04:01:23 +0000575 assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
576 "Not a VGPR32 reg");
Jan Sjodina06bfe02017-05-15 20:18:37 +0000577
578 if (Cond.size() == 1) {
579 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
580 .addReg(FalseReg)
581 .addReg(TrueReg)
582 .add(Cond[0]);
583 } else if (Cond.size() == 2) {
584 assert(Cond[0].isImm() && "Cond[0] is not an immediate");
585 switch (Cond[0].getImm()) {
586 case SIInstrInfo::SCC_TRUE: {
587 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
588 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
589 .addImm(-1)
590 .addImm(0);
591 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
592 .addReg(FalseReg)
593 .addReg(TrueReg)
594 .addReg(SReg);
595 break;
596 }
597 case SIInstrInfo::SCC_FALSE: {
598 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
599 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
600 .addImm(0)
601 .addImm(-1);
602 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
603 .addReg(FalseReg)
604 .addReg(TrueReg)
605 .addReg(SReg);
606 break;
607 }
608 case SIInstrInfo::VCCNZ: {
609 MachineOperand RegOp = Cond[1];
610 RegOp.setImplicit(false);
611 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
612 .addReg(FalseReg)
613 .addReg(TrueReg)
614 .add(RegOp);
615 break;
616 }
617 case SIInstrInfo::VCCZ: {
618 MachineOperand RegOp = Cond[1];
619 RegOp.setImplicit(false);
620 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
621 .addReg(TrueReg)
622 .addReg(FalseReg)
623 .add(RegOp);
624 break;
625 }
626 case SIInstrInfo::EXECNZ: {
627 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
628 unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
629 BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
630 .addImm(0);
631 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
632 .addImm(-1)
633 .addImm(0);
634 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
635 .addReg(FalseReg)
636 .addReg(TrueReg)
637 .addReg(SReg);
638 break;
639 }
640 case SIInstrInfo::EXECZ: {
641 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
642 unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
643 BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
644 .addImm(0);
645 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
646 .addImm(0)
647 .addImm(-1);
648 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
649 .addReg(FalseReg)
650 .addReg(TrueReg)
651 .addReg(SReg);
652 llvm_unreachable("Unhandled branch predicate EXECZ");
653 break;
654 }
655 default:
656 llvm_unreachable("invalid branch predicate");
657 }
658 } else {
659 llvm_unreachable("Can only handle Cond size 1 or 2");
660 }
661}
662
663unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
664 MachineBasicBlock::iterator I,
665 const DebugLoc &DL,
666 unsigned SrcReg, int Value) const {
667 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
668 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
669 BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
670 .addImm(Value)
671 .addReg(SrcReg);
672
673 return Reg;
674}
675
676unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
677 MachineBasicBlock::iterator I,
678 const DebugLoc &DL,
679 unsigned SrcReg, int Value) const {
680 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
681 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
682 BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
683 .addImm(Value)
684 .addReg(SrcReg);
685
686 return Reg;
687}
688
Tom Stellardef3b8642015-01-07 19:56:17 +0000689unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
690
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000691 if (RI.getRegSizeInBits(*DstRC) == 32) {
Tom Stellardef3b8642015-01-07 19:56:17 +0000692 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000693 } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
Tom Stellardef3b8642015-01-07 19:56:17 +0000694 return AMDGPU::S_MOV_B64;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000695 } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
Tom Stellard4842c052015-01-07 20:27:25 +0000696 return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellardef3b8642015-01-07 19:56:17 +0000697 }
698 return AMDGPU::COPY;
699}
700
Matt Arsenault08f14de2015-11-06 18:07:53 +0000701static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
702 switch (Size) {
703 case 4:
704 return AMDGPU::SI_SPILL_S32_SAVE;
705 case 8:
706 return AMDGPU::SI_SPILL_S64_SAVE;
707 case 16:
708 return AMDGPU::SI_SPILL_S128_SAVE;
709 case 32:
710 return AMDGPU::SI_SPILL_S256_SAVE;
711 case 64:
712 return AMDGPU::SI_SPILL_S512_SAVE;
713 default:
714 llvm_unreachable("unknown register size");
715 }
716}
717
718static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
719 switch (Size) {
720 case 4:
721 return AMDGPU::SI_SPILL_V32_SAVE;
722 case 8:
723 return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard703b2ec2016-04-12 23:57:30 +0000724 case 12:
725 return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault08f14de2015-11-06 18:07:53 +0000726 case 16:
727 return AMDGPU::SI_SPILL_V128_SAVE;
728 case 32:
729 return AMDGPU::SI_SPILL_V256_SAVE;
730 case 64:
731 return AMDGPU::SI_SPILL_V512_SAVE;
732 default:
733 llvm_unreachable("unknown register size");
734 }
735}
736
Tom Stellardc149dc02013-11-27 21:23:35 +0000737void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
738 MachineBasicBlock::iterator MI,
739 unsigned SrcReg, bool isKill,
740 int FrameIndex,
741 const TargetRegisterClass *RC,
742 const TargetRegisterInfo *TRI) const {
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000743 MachineFunction *MF = MBB.getParent();
Tom Stellard42fb60e2015-01-14 15:42:31 +0000744 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000745 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Tom Stellardc149dc02013-11-27 21:23:35 +0000746 DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000747
Matthias Braun941a7052016-07-28 18:40:00 +0000748 unsigned Size = FrameInfo.getObjectSize(FrameIndex);
749 unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000750 MachinePointerInfo PtrInfo
751 = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
752 MachineMemOperand *MMO
753 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
754 Size, Align);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000755 unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellardc149dc02013-11-27 21:23:35 +0000756
Tom Stellard96468902014-09-24 01:33:17 +0000757 if (RI.isSGPRClass(RC)) {
Matt Arsenault5b22dfa2015-11-05 05:27:10 +0000758 MFI->setHasSpilledSGPRs();
759
Matt Arsenault2510a312016-09-03 06:57:55 +0000760 // We are only allowed to create one new instruction when spilling
761 // registers, so we need to use pseudo instruction for spilling SGPRs.
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000762 const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
Matt Arsenault2510a312016-09-03 06:57:55 +0000763
764 // The SGPR spill/restore instructions only work on number sgprs, so we need
765 // to make sure we are using the correct register class.
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000766 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
Matt Arsenaultb6e1cc22016-05-21 00:53:42 +0000767 MachineRegisterInfo &MRI = MF->getRegInfo();
768 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
769 }
770
Marek Olsak79c05872016-11-25 17:37:09 +0000771 MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
Matt Arsenault3354f422016-09-10 01:20:33 +0000772 .addReg(SrcReg, getKillRegState(isKill)) // data
773 .addFrameIndex(FrameIndex) // addr
Matt Arsenault08906a32016-10-28 19:43:31 +0000774 .addMemOperand(MMO)
775 .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000776 .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault08906a32016-10-28 19:43:31 +0000777 // Add the scratch resource registers as implicit uses because we may end up
778 // needing them, and need to ensure that the reserved registers are
779 // correctly handled.
Tom Stellard42fb60e2015-01-14 15:42:31 +0000780
Matt Arsenaultdb782732017-07-20 21:03:45 +0000781 FrameInfo.setStackID(FrameIndex, 1);
Marek Olsak79c05872016-11-25 17:37:09 +0000782 if (ST.hasScalarStores()) {
783 // m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle43cc6c42017-06-27 08:04:13 +0000784 Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
Marek Olsak79c05872016-11-25 17:37:09 +0000785 }
786
Matt Arsenault08f14de2015-11-06 18:07:53 +0000787 return;
Tom Stellard96468902014-09-24 01:33:17 +0000788 }
Tom Stellardeba61072014-05-02 15:41:42 +0000789
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000790 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard96468902014-09-24 01:33:17 +0000791 LLVMContext &Ctx = MF->getFunction()->getContext();
792 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
793 " spill register");
Tom Stellard0febe682015-01-14 15:42:34 +0000794 BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
Matt Arsenault08f14de2015-11-06 18:07:53 +0000795 .addReg(SrcReg);
796
797 return;
798 }
799
800 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
801
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000802 unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000803 MFI->setHasSpilledVGPRs();
804 BuildMI(MBB, MI, DL, get(Opcode))
Matt Arsenault3354f422016-09-10 01:20:33 +0000805 .addReg(SrcReg, getKillRegState(isKill)) // data
806 .addFrameIndex(FrameIndex) // addr
Matt Arsenault2510a312016-09-03 06:57:55 +0000807 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000808 .addReg(MFI->getFrameOffsetReg()) // scratch_offset
Matt Arsenault2510a312016-09-03 06:57:55 +0000809 .addImm(0) // offset
Matt Arsenault08f14de2015-11-06 18:07:53 +0000810 .addMemOperand(MMO);
811}
812
813static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
814 switch (Size) {
815 case 4:
816 return AMDGPU::SI_SPILL_S32_RESTORE;
817 case 8:
818 return AMDGPU::SI_SPILL_S64_RESTORE;
819 case 16:
820 return AMDGPU::SI_SPILL_S128_RESTORE;
821 case 32:
822 return AMDGPU::SI_SPILL_S256_RESTORE;
823 case 64:
824 return AMDGPU::SI_SPILL_S512_RESTORE;
825 default:
826 llvm_unreachable("unknown register size");
827 }
828}
829
830static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
831 switch (Size) {
832 case 4:
833 return AMDGPU::SI_SPILL_V32_RESTORE;
834 case 8:
835 return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard703b2ec2016-04-12 23:57:30 +0000836 case 12:
837 return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault08f14de2015-11-06 18:07:53 +0000838 case 16:
839 return AMDGPU::SI_SPILL_V128_RESTORE;
840 case 32:
841 return AMDGPU::SI_SPILL_V256_RESTORE;
842 case 64:
843 return AMDGPU::SI_SPILL_V512_RESTORE;
844 default:
845 llvm_unreachable("unknown register size");
Tom Stellardc149dc02013-11-27 21:23:35 +0000846 }
847}
848
849void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
850 MachineBasicBlock::iterator MI,
851 unsigned DestReg, int FrameIndex,
852 const TargetRegisterClass *RC,
853 const TargetRegisterInfo *TRI) const {
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000854 MachineFunction *MF = MBB.getParent();
Tom Stellarde99fb652015-01-20 19:33:04 +0000855 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000856 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Tom Stellardc149dc02013-11-27 21:23:35 +0000857 DebugLoc DL = MBB.findDebugLoc(MI);
Matthias Braun941a7052016-07-28 18:40:00 +0000858 unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
859 unsigned Size = FrameInfo.getObjectSize(FrameIndex);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000860 unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000861
Matt Arsenault08f14de2015-11-06 18:07:53 +0000862 MachinePointerInfo PtrInfo
863 = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
864
865 MachineMemOperand *MMO = MF->getMachineMemOperand(
866 PtrInfo, MachineMemOperand::MOLoad, Size, Align);
867
868 if (RI.isSGPRClass(RC)) {
869 // FIXME: Maybe this should not include a memoperand because it will be
870 // lowered to non-memory instructions.
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000871 const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
872 if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
Matt Arsenaultb6e1cc22016-05-21 00:53:42 +0000873 MachineRegisterInfo &MRI = MF->getRegInfo();
874 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
875 }
876
Matt Arsenaultdb782732017-07-20 21:03:45 +0000877 FrameInfo.setStackID(FrameIndex, 1);
Marek Olsak79c05872016-11-25 17:37:09 +0000878 MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
Matt Arsenault3354f422016-09-10 01:20:33 +0000879 .addFrameIndex(FrameIndex) // addr
Matt Arsenault08906a32016-10-28 19:43:31 +0000880 .addMemOperand(MMO)
881 .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000882 .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000883
Marek Olsak79c05872016-11-25 17:37:09 +0000884 if (ST.hasScalarStores()) {
885 // m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle43cc6c42017-06-27 08:04:13 +0000886 Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
Marek Olsak79c05872016-11-25 17:37:09 +0000887 }
888
Matt Arsenault08f14de2015-11-06 18:07:53 +0000889 return;
Tom Stellard96468902014-09-24 01:33:17 +0000890 }
Tom Stellardeba61072014-05-02 15:41:42 +0000891
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000892 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard96468902014-09-24 01:33:17 +0000893 LLVMContext &Ctx = MF->getFunction()->getContext();
894 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
895 " restore register");
Tom Stellard0febe682015-01-14 15:42:34 +0000896 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000897
898 return;
Tom Stellardc149dc02013-11-27 21:23:35 +0000899 }
Matt Arsenault08f14de2015-11-06 18:07:53 +0000900
901 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
902
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000903 unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000904 BuildMI(MBB, MI, DL, get(Opcode), DestReg)
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000905 .addFrameIndex(FrameIndex) // vaddr
906 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
907 .addReg(MFI->getFrameOffsetReg()) // scratch_offset
908 .addImm(0) // offset
Matt Arsenault08f14de2015-11-06 18:07:53 +0000909 .addMemOperand(MMO);
Tom Stellardc149dc02013-11-27 21:23:35 +0000910}
911
Tom Stellard96468902014-09-24 01:33:17 +0000912/// \param @Offset Offset in bytes of the FrameIndex being spilled
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000913unsigned SIInstrInfo::calculateLDSSpillAddress(
914 MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
915 unsigned FrameOffset, unsigned Size) const {
Tom Stellard96468902014-09-24 01:33:17 +0000916 MachineFunction *MF = MBB.getParent();
917 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000918 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
Tom Stellard96468902014-09-24 01:33:17 +0000919 DebugLoc DL = MBB.findDebugLoc(MI);
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000920 unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
Tom Stellard96468902014-09-24 01:33:17 +0000921 unsigned WavefrontSize = ST.getWavefrontSize();
922
923 unsigned TIDReg = MFI->getTIDReg();
924 if (!MFI->hasCalculatedTID()) {
925 MachineBasicBlock &Entry = MBB.getParent()->front();
926 MachineBasicBlock::iterator Insert = Entry.front();
927 DebugLoc DL = Insert->getDebugLoc();
928
Tom Stellard19f43012016-07-28 14:30:43 +0000929 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
930 *MF);
Tom Stellard96468902014-09-24 01:33:17 +0000931 if (TIDReg == AMDGPU::NoRegister)
932 return TIDReg;
933
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000934 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
Tom Stellard96468902014-09-24 01:33:17 +0000935 WorkGroupSize > WavefrontSize) {
936
Matt Arsenaultac234b62015-11-30 21:15:57 +0000937 unsigned TIDIGXReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000938 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
Matt Arsenaultac234b62015-11-30 21:15:57 +0000939 unsigned TIDIGYReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000940 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
Matt Arsenaultac234b62015-11-30 21:15:57 +0000941 unsigned TIDIGZReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000942 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
Tom Stellard96468902014-09-24 01:33:17 +0000943 unsigned InputPtrReg =
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000944 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer7149aab2015-03-01 18:09:56 +0000945 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard96468902014-09-24 01:33:17 +0000946 if (!Entry.isLiveIn(Reg))
947 Entry.addLiveIn(Reg);
948 }
949
Matthias Braun7dc03f02016-04-06 02:47:09 +0000950 RS->enterBasicBlock(Entry);
Matt Arsenault0c90e952015-11-06 18:17:45 +0000951 // FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard96468902014-09-24 01:33:17 +0000952 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
953 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
954 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
955 .addReg(InputPtrReg)
956 .addImm(SI::KernelInputOffsets::NGROUPS_Z);
957 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
958 .addReg(InputPtrReg)
959 .addImm(SI::KernelInputOffsets::NGROUPS_Y);
960
961 // NGROUPS.X * NGROUPS.Y
962 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
963 .addReg(STmp1)
964 .addReg(STmp0);
965 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
966 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
967 .addReg(STmp1)
968 .addReg(TIDIGXReg);
969 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
970 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
971 .addReg(STmp0)
972 .addReg(TIDIGYReg)
973 .addReg(TIDReg);
974 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
975 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
976 .addReg(TIDReg)
977 .addReg(TIDIGZReg);
978 } else {
979 // Get the wave id
980 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
981 TIDReg)
982 .addImm(-1)
983 .addImm(0);
984
Marek Olsakc5368502015-01-15 18:43:01 +0000985 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard96468902014-09-24 01:33:17 +0000986 TIDReg)
987 .addImm(-1)
988 .addReg(TIDReg);
989 }
990
991 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
992 TIDReg)
993 .addImm(2)
994 .addReg(TIDReg);
995 MFI->setTIDReg(TIDReg);
996 }
997
998 // Add FrameIndex to LDS offset
Matt Arsenault52ef4012016-07-26 16:45:58 +0000999 unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
Tom Stellard96468902014-09-24 01:33:17 +00001000 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
1001 .addImm(LDSOffset)
1002 .addReg(TIDReg);
1003
1004 return TmpReg;
1005}
1006
Tom Stellardd37630e2016-04-07 14:47:07 +00001007void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
1008 MachineBasicBlock::iterator MI,
Nicolai Haehnle87323da2015-12-17 16:46:42 +00001009 int Count) const {
Tom Stellard341e2932016-05-02 18:02:24 +00001010 DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellardeba61072014-05-02 15:41:42 +00001011 while (Count > 0) {
1012 int Arg;
1013 if (Count >= 8)
1014 Arg = 7;
1015 else
1016 Arg = Count - 1;
1017 Count -= 8;
Tom Stellard341e2932016-05-02 18:02:24 +00001018 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
Tom Stellardeba61072014-05-02 15:41:42 +00001019 .addImm(Arg);
1020 }
1021}
1022
Tom Stellardcb6ba622016-04-30 00:23:06 +00001023void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
1024 MachineBasicBlock::iterator MI) const {
1025 insertWaitStates(MBB, MI, 1);
1026}
1027
Jan Sjodina06bfe02017-05-15 20:18:37 +00001028void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
1029 auto MF = MBB.getParent();
1030 SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1031
1032 assert(Info->isEntryFunction());
1033
1034 if (MBB.succ_empty()) {
1035 bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
1036 if (HasNoTerminator)
1037 BuildMI(MBB, MBB.end(), DebugLoc(),
1038 get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
1039 }
1040}
1041
Tom Stellardcb6ba622016-04-30 00:23:06 +00001042unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
1043 switch (MI.getOpcode()) {
1044 default: return 1; // FIXME: Do wait states equal cycles?
1045
1046 case AMDGPU::S_NOP:
1047 return MI.getOperand(0).getImm() + 1;
1048 }
1049}
1050
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001051bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1052 MachineBasicBlock &MBB = *MI.getParent();
Tom Stellardeba61072014-05-02 15:41:42 +00001053 DebugLoc DL = MBB.findDebugLoc(MI);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001054 switch (MI.getOpcode()) {
Tom Stellardeba61072014-05-02 15:41:42 +00001055 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
Matt Arsenaulte6740752016-09-29 01:44:16 +00001056 case AMDGPU::S_MOV_B64_term: {
1057 // This is only a terminator to get the correct spill code placement during
1058 // register allocation.
1059 MI.setDesc(get(AMDGPU::S_MOV_B64));
1060 break;
1061 }
1062 case AMDGPU::S_XOR_B64_term: {
1063 // This is only a terminator to get the correct spill code placement during
1064 // register allocation.
1065 MI.setDesc(get(AMDGPU::S_XOR_B64));
1066 break;
1067 }
1068 case AMDGPU::S_ANDN2_B64_term: {
1069 // This is only a terminator to get the correct spill code placement during
1070 // register allocation.
1071 MI.setDesc(get(AMDGPU::S_ANDN2_B64));
1072 break;
1073 }
Tom Stellard4842c052015-01-07 20:27:25 +00001074 case AMDGPU::V_MOV_B64_PSEUDO: {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001075 unsigned Dst = MI.getOperand(0).getReg();
Tom Stellard4842c052015-01-07 20:27:25 +00001076 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1077 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1078
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001079 const MachineOperand &SrcOp = MI.getOperand(1);
Tom Stellard4842c052015-01-07 20:27:25 +00001080 // FIXME: Will this work for 64-bit floating point immediates?
1081 assert(!SrcOp.isFPImm());
1082 if (SrcOp.isImm()) {
1083 APInt Imm(64, SrcOp.getImm());
1084 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001085 .addImm(Imm.getLoBits(32).getZExtValue())
1086 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001087 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001088 .addImm(Imm.getHiBits(32).getZExtValue())
1089 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001090 } else {
1091 assert(SrcOp.isReg());
1092 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001093 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
1094 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001095 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001096 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
1097 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001098 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001099 MI.eraseFromParent();
Tom Stellard4842c052015-01-07 20:27:25 +00001100 break;
1101 }
Nicolai Haehnlea7852092016-10-24 14:56:02 +00001102 case AMDGPU::V_MOVRELD_B32_V1:
1103 case AMDGPU::V_MOVRELD_B32_V2:
1104 case AMDGPU::V_MOVRELD_B32_V4:
1105 case AMDGPU::V_MOVRELD_B32_V8:
1106 case AMDGPU::V_MOVRELD_B32_V16: {
1107 const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
1108 unsigned VecReg = MI.getOperand(0).getReg();
1109 bool IsUndef = MI.getOperand(1).isUndef();
1110 unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
1111 assert(VecReg == MI.getOperand(1).getReg());
1112
1113 MachineInstr *MovRel =
1114 BuildMI(MBB, MI, DL, MovRelDesc)
1115 .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
Diana Picus116bbab2017-01-13 09:58:52 +00001116 .add(MI.getOperand(2))
Nicolai Haehnlea7852092016-10-24 14:56:02 +00001117 .addReg(VecReg, RegState::ImplicitDefine)
Diana Picus116bbab2017-01-13 09:58:52 +00001118 .addReg(VecReg,
1119 RegState::Implicit | (IsUndef ? RegState::Undef : 0));
Nicolai Haehnlea7852092016-10-24 14:56:02 +00001120
1121 const int ImpDefIdx =
1122 MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
1123 const int ImpUseIdx = ImpDefIdx + 1;
1124 MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1125
1126 MI.eraseFromParent();
1127 break;
1128 }
Tom Stellardbf3e6e52016-06-14 20:29:59 +00001129 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
Tom Stellardc93fc112015-12-10 02:13:01 +00001130 MachineFunction &MF = *MBB.getParent();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001131 unsigned Reg = MI.getOperand(0).getReg();
Matt Arsenault11587d92016-08-10 19:11:45 +00001132 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
1133 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
Tom Stellardc93fc112015-12-10 02:13:01 +00001134
1135 // Create a bundle so these instructions won't be re-ordered by the
1136 // post-RA scheduler.
1137 MIBundleBuilder Bundler(MBB, MI);
1138 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
1139
1140 // Add 32-bit offset from this instruction to the start of the
1141 // constant data.
1142 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001143 .addReg(RegLo)
Diana Picus116bbab2017-01-13 09:58:52 +00001144 .add(MI.getOperand(1)));
Tom Stellardc93fc112015-12-10 02:13:01 +00001145
Konstantin Zhuravlyovc96b5d72016-10-14 04:37:34 +00001146 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
1147 .addReg(RegHi);
1148 if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
1149 MIB.addImm(0);
1150 else
Diana Picus116bbab2017-01-13 09:58:52 +00001151 MIB.add(MI.getOperand(2));
Konstantin Zhuravlyovc96b5d72016-10-14 04:37:34 +00001152
1153 Bundler.append(MIB);
Tom Stellardc93fc112015-12-10 02:13:01 +00001154 llvm::finalizeBundle(MBB, Bundler.begin());
1155
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001156 MI.eraseFromParent();
Tom Stellardc93fc112015-12-10 02:13:01 +00001157 break;
1158 }
Connor Abbott92638ab2017-08-04 18:36:52 +00001159 case AMDGPU::EXIT_WWM: {
1160 // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
1161 // is exited.
1162 MI.setDesc(get(AMDGPU::S_MOV_B64));
1163 break;
1164 }
Tom Stellardeba61072014-05-02 15:41:42 +00001165 }
1166 return true;
1167}
1168
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001169bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
1170 MachineOperand &Src0,
1171 unsigned Src0OpName,
1172 MachineOperand &Src1,
1173 unsigned Src1OpName) const {
1174 MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
1175 if (!Src0Mods)
1176 return false;
1177
1178 MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
1179 assert(Src1Mods &&
1180 "All commutable instructions have both src0 and src1 modifiers");
1181
1182 int Src0ModsVal = Src0Mods->getImm();
1183 int Src1ModsVal = Src1Mods->getImm();
1184
1185 Src1Mods->setImm(Src0ModsVal);
1186 Src0Mods->setImm(Src1ModsVal);
1187 return true;
1188}
1189
1190static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
1191 MachineOperand &RegOp,
Matt Arsenault25dba302016-09-13 19:03:12 +00001192 MachineOperand &NonRegOp) {
1193 unsigned Reg = RegOp.getReg();
1194 unsigned SubReg = RegOp.getSubReg();
1195 bool IsKill = RegOp.isKill();
1196 bool IsDead = RegOp.isDead();
1197 bool IsUndef = RegOp.isUndef();
1198 bool IsDebug = RegOp.isDebug();
1199
1200 if (NonRegOp.isImm())
1201 RegOp.ChangeToImmediate(NonRegOp.getImm());
1202 else if (NonRegOp.isFI())
1203 RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
1204 else
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001205 return nullptr;
1206
Matt Arsenault25dba302016-09-13 19:03:12 +00001207 NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
1208 NonRegOp.setSubReg(SubReg);
1209
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001210 return &MI;
1211}
1212
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001213MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001214 unsigned Src0Idx,
1215 unsigned Src1Idx) const {
1216 assert(!NewMI && "this should never be used");
1217
1218 unsigned Opc = MI.getOpcode();
1219 int CommutedOpcode = commuteOpcode(Opc);
Marek Olsakcfbdba22015-06-26 20:29:10 +00001220 if (CommutedOpcode == -1)
1221 return nullptr;
1222
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001223 assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
1224 static_cast<int>(Src0Idx) &&
1225 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
1226 static_cast<int>(Src1Idx) &&
1227 "inconsistency with findCommutedOpIndices");
1228
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001229 MachineOperand &Src0 = MI.getOperand(Src0Idx);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001230 MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenaultaa5ccfb2014-10-17 18:00:37 +00001231
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001232 MachineInstr *CommutedMI = nullptr;
1233 if (Src0.isReg() && Src1.isReg()) {
1234 if (isOperandLegal(MI, Src1Idx, &Src0)) {
1235 // Be sure to copy the source modifiers to the right place.
1236 CommutedMI
1237 = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
Matt Arsenaultd282ada2014-10-17 18:00:48 +00001238 }
1239
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001240 } else if (Src0.isReg() && !Src1.isReg()) {
1241 // src0 should always be able to support any operand type, so no need to
1242 // check operand legality.
1243 CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
1244 } else if (!Src0.isReg() && Src1.isReg()) {
1245 if (isOperandLegal(MI, Src1Idx, &Src0))
1246 CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
Tom Stellard82166022013-11-13 23:36:37 +00001247 } else {
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001248 // FIXME: Found two non registers to commute. This does happen.
1249 return nullptr;
Tom Stellard82166022013-11-13 23:36:37 +00001250 }
Christian Konig3c145802013-03-27 09:12:59 +00001251
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001252
1253 if (CommutedMI) {
1254 swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
1255 Src1, AMDGPU::OpName::src1_modifiers);
1256
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001257 CommutedMI->setDesc(get(CommutedOpcode));
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001258 }
Christian Konig3c145802013-03-27 09:12:59 +00001259
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001260 return CommutedMI;
Christian Konig76edd4f2013-02-26 17:52:29 +00001261}
1262
Matt Arsenault92befe72014-09-26 17:54:54 +00001263// This needs to be implemented because the source modifiers may be inserted
1264// between the true commutable operands, and the base
1265// TargetInstrInfo::commuteInstruction uses it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001266bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
Andrew Kaylor16c4da02015-09-28 20:33:22 +00001267 unsigned &SrcOpIdx1) const {
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001268 if (!MI.isCommutable())
Matt Arsenault92befe72014-09-26 17:54:54 +00001269 return false;
1270
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001271 unsigned Opc = MI.getOpcode();
Matt Arsenault92befe72014-09-26 17:54:54 +00001272 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1273 if (Src0Idx == -1)
1274 return false;
1275
Matt Arsenault92befe72014-09-26 17:54:54 +00001276 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1277 if (Src1Idx == -1)
1278 return false;
1279
Andrew Kaylor16c4da02015-09-28 20:33:22 +00001280 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault92befe72014-09-26 17:54:54 +00001281}
1282
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001283bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1284 int64_t BrOffset) const {
1285 // BranchRelaxation should never have to check s_setpc_b64 because its dest
1286 // block is unanalyzable.
1287 assert(BranchOp != AMDGPU::S_SETPC_B64);
1288
1289 // Convert to dwords.
1290 BrOffset /= 4;
1291
1292 // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
1293 // from the next instruction.
1294 BrOffset -= 1;
1295
1296 return isIntN(BranchOffsetBits, BrOffset);
1297}
1298
1299MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
1300 const MachineInstr &MI) const {
1301 if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
1302 // This would be a difficult analysis to perform, but can always be legal so
1303 // there's no need to analyze it.
1304 return nullptr;
1305 }
1306
1307 return MI.getOperand(0).getMBB();
1308}
1309
1310unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1311 MachineBasicBlock &DestBB,
1312 const DebugLoc &DL,
1313 int64_t BrOffset,
1314 RegScavenger *RS) const {
1315 assert(RS && "RegScavenger required for long branching");
1316 assert(MBB.empty() &&
1317 "new block should be inserted for expanding unconditional branch");
1318 assert(MBB.pred_size() == 1);
1319
1320 MachineFunction *MF = MBB.getParent();
1321 MachineRegisterInfo &MRI = MF->getRegInfo();
1322
1323 // FIXME: Virtual register workaround for RegScavenger not working with empty
1324 // blocks.
1325 unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1326
1327 auto I = MBB.end();
1328
1329 // We need to compute the offset relative to the instruction immediately after
1330 // s_getpc_b64. Insert pc arithmetic code before last terminator.
1331 MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
1332
1333 // TODO: Handle > 32-bit block address.
1334 if (BrOffset >= 0) {
1335 BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
1336 .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1337 .addReg(PCReg, 0, AMDGPU::sub0)
1338 .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
1339 BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
1340 .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1341 .addReg(PCReg, 0, AMDGPU::sub1)
1342 .addImm(0);
1343 } else {
1344 // Backwards branch.
1345 BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
1346 .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1347 .addReg(PCReg, 0, AMDGPU::sub0)
1348 .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
1349 BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
1350 .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1351 .addReg(PCReg, 0, AMDGPU::sub1)
1352 .addImm(0);
1353 }
1354
1355 // Insert the indirect branch after the other terminator.
1356 BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
1357 .addReg(PCReg);
1358
1359 // FIXME: If spilling is necessary, this will fail because this scavenger has
1360 // no emergency stack slots. It is non-trivial to spill in this situation,
1361 // because the restore code needs to be specially placed after the
1362 // jump. BranchRelaxation then needs to be made aware of the newly inserted
1363 // block.
1364 //
1365 // If a spill is needed for the pc register pair, we need to insert a spill
1366 // restore block right before the destination block, and insert a short branch
1367 // into the old destination block's fallthrough predecessor.
1368 // e.g.:
1369 //
1370 // s_cbranch_scc0 skip_long_branch:
1371 //
1372 // long_branch_bb:
1373 // spill s[8:9]
1374 // s_getpc_b64 s[8:9]
1375 // s_add_u32 s8, s8, restore_bb
1376 // s_addc_u32 s9, s9, 0
1377 // s_setpc_b64 s[8:9]
1378 //
1379 // skip_long_branch:
1380 // foo;
1381 //
1382 // .....
1383 //
1384 // dest_bb_fallthrough_predecessor:
1385 // bar;
1386 // s_branch dest_bb
1387 //
1388 // restore_bb:
1389 // restore s[8:9]
1390 // fallthrough dest_bb
1391 ///
1392 // dest_bb:
1393 // buzz;
1394
1395 RS->enterBasicBlockEnd(MBB);
1396 unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
1397 MachineBasicBlock::iterator(GetPC), 0);
1398 MRI.replaceRegWith(PCReg, Scav);
1399 MRI.clearVirtRegs();
1400 RS->setRegUsed(Scav);
1401
1402 return 4 + 8 + 4 + 4;
1403}
1404
Matt Arsenault6d093802016-05-21 00:29:27 +00001405unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1406 switch (Cond) {
1407 case SIInstrInfo::SCC_TRUE:
1408 return AMDGPU::S_CBRANCH_SCC1;
1409 case SIInstrInfo::SCC_FALSE:
1410 return AMDGPU::S_CBRANCH_SCC0;
Matt Arsenault49459052016-05-21 00:29:40 +00001411 case SIInstrInfo::VCCNZ:
1412 return AMDGPU::S_CBRANCH_VCCNZ;
1413 case SIInstrInfo::VCCZ:
1414 return AMDGPU::S_CBRANCH_VCCZ;
1415 case SIInstrInfo::EXECNZ:
1416 return AMDGPU::S_CBRANCH_EXECNZ;
1417 case SIInstrInfo::EXECZ:
1418 return AMDGPU::S_CBRANCH_EXECZ;
Matt Arsenault6d093802016-05-21 00:29:27 +00001419 default:
1420 llvm_unreachable("invalid branch predicate");
1421 }
1422}
1423
1424SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1425 switch (Opcode) {
1426 case AMDGPU::S_CBRANCH_SCC0:
1427 return SCC_FALSE;
1428 case AMDGPU::S_CBRANCH_SCC1:
1429 return SCC_TRUE;
Matt Arsenault49459052016-05-21 00:29:40 +00001430 case AMDGPU::S_CBRANCH_VCCNZ:
1431 return VCCNZ;
1432 case AMDGPU::S_CBRANCH_VCCZ:
1433 return VCCZ;
1434 case AMDGPU::S_CBRANCH_EXECNZ:
1435 return EXECNZ;
1436 case AMDGPU::S_CBRANCH_EXECZ:
1437 return EXECZ;
Matt Arsenault6d093802016-05-21 00:29:27 +00001438 default:
1439 return INVALID_BR;
1440 }
1441}
1442
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001443bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
1444 MachineBasicBlock::iterator I,
1445 MachineBasicBlock *&TBB,
1446 MachineBasicBlock *&FBB,
1447 SmallVectorImpl<MachineOperand> &Cond,
1448 bool AllowModify) const {
Matt Arsenault6d093802016-05-21 00:29:27 +00001449 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1450 // Unconditional Branch
1451 TBB = I->getOperand(0).getMBB();
1452 return false;
1453 }
1454
Jan Sjodina06bfe02017-05-15 20:18:37 +00001455 MachineBasicBlock *CondBB = nullptr;
Matt Arsenault6d093802016-05-21 00:29:27 +00001456
Jan Sjodina06bfe02017-05-15 20:18:37 +00001457 if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
1458 CondBB = I->getOperand(1).getMBB();
1459 Cond.push_back(I->getOperand(0));
1460 } else {
1461 BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1462 if (Pred == INVALID_BR)
1463 return true;
Matt Arsenault6d093802016-05-21 00:29:27 +00001464
Jan Sjodina06bfe02017-05-15 20:18:37 +00001465 CondBB = I->getOperand(0).getMBB();
1466 Cond.push_back(MachineOperand::CreateImm(Pred));
1467 Cond.push_back(I->getOperand(1)); // Save the branch register.
1468 }
Matt Arsenault6d093802016-05-21 00:29:27 +00001469 ++I;
1470
1471 if (I == MBB.end()) {
1472 // Conditional branch followed by fall-through.
1473 TBB = CondBB;
1474 return false;
1475 }
1476
1477 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1478 TBB = CondBB;
1479 FBB = I->getOperand(0).getMBB();
1480 return false;
1481 }
1482
1483 return true;
1484}
1485
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001486bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
1487 MachineBasicBlock *&FBB,
1488 SmallVectorImpl<MachineOperand> &Cond,
1489 bool AllowModify) const {
1490 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
1491 if (I == MBB.end())
1492 return false;
1493
1494 if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1495 return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
1496
1497 ++I;
1498
1499 // TODO: Should be able to treat as fallthrough?
1500 if (I == MBB.end())
1501 return true;
1502
1503 if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
1504 return true;
1505
1506 MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
1507
1508 // Specifically handle the case where the conditional branch is to the same
1509 // destination as the mask branch. e.g.
1510 //
1511 // si_mask_branch BB8
1512 // s_cbranch_execz BB8
1513 // s_cbranch BB9
1514 //
1515 // This is required to understand divergent loops which may need the branches
1516 // to be relaxed.
1517 if (TBB != MaskBrDest || Cond.empty())
1518 return true;
1519
1520 auto Pred = Cond[0].getImm();
1521 return (Pred != EXECZ && Pred != EXECNZ);
1522}
1523
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +00001524unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001525 int *BytesRemoved) const {
Matt Arsenault6d093802016-05-21 00:29:27 +00001526 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
1527
1528 unsigned Count = 0;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001529 unsigned RemovedSize = 0;
Matt Arsenault6d093802016-05-21 00:29:27 +00001530 while (I != MBB.end()) {
1531 MachineBasicBlock::iterator Next = std::next(I);
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001532 if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1533 I = Next;
1534 continue;
1535 }
1536
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001537 RemovedSize += getInstSizeInBytes(*I);
Matt Arsenault6d093802016-05-21 00:29:27 +00001538 I->eraseFromParent();
1539 ++Count;
1540 I = Next;
1541 }
1542
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001543 if (BytesRemoved)
1544 *BytesRemoved = RemovedSize;
1545
Matt Arsenault6d093802016-05-21 00:29:27 +00001546 return Count;
1547}
1548
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001549// Copy the flags onto the implicit condition register operand.
1550static void preserveCondRegFlags(MachineOperand &CondReg,
1551 const MachineOperand &OrigCond) {
1552 CondReg.setIsUndef(OrigCond.isUndef());
1553 CondReg.setIsKill(OrigCond.isKill());
1554}
1555
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +00001556unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
Matt Arsenault6d093802016-05-21 00:29:27 +00001557 MachineBasicBlock *TBB,
1558 MachineBasicBlock *FBB,
1559 ArrayRef<MachineOperand> Cond,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001560 const DebugLoc &DL,
1561 int *BytesAdded) const {
Matt Arsenault6d093802016-05-21 00:29:27 +00001562
1563 if (!FBB && Cond.empty()) {
1564 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1565 .addMBB(TBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001566 if (BytesAdded)
1567 *BytesAdded = 4;
Matt Arsenault6d093802016-05-21 00:29:27 +00001568 return 1;
1569 }
1570
Jan Sjodina06bfe02017-05-15 20:18:37 +00001571 if(Cond.size() == 1 && Cond[0].isReg()) {
1572 BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
1573 .add(Cond[0])
1574 .addMBB(TBB);
1575 return 1;
1576 }
1577
Matt Arsenault6d093802016-05-21 00:29:27 +00001578 assert(TBB && Cond[0].isImm());
1579
1580 unsigned Opcode
1581 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1582
1583 if (!FBB) {
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001584 Cond[1].isUndef();
1585 MachineInstr *CondBr =
1586 BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault6d093802016-05-21 00:29:27 +00001587 .addMBB(TBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001588
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001589 // Copy the flags onto the implicit condition register operand.
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001590 preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001591
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001592 if (BytesAdded)
1593 *BytesAdded = 4;
Matt Arsenault6d093802016-05-21 00:29:27 +00001594 return 1;
1595 }
1596
1597 assert(TBB && FBB);
1598
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001599 MachineInstr *CondBr =
1600 BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault6d093802016-05-21 00:29:27 +00001601 .addMBB(TBB);
1602 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1603 .addMBB(FBB);
1604
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001605 MachineOperand &CondReg = CondBr->getOperand(1);
1606 CondReg.setIsUndef(Cond[1].isUndef());
1607 CondReg.setIsKill(Cond[1].isKill());
1608
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001609 if (BytesAdded)
1610 *BytesAdded = 8;
1611
Matt Arsenault6d093802016-05-21 00:29:27 +00001612 return 2;
1613}
1614
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +00001615bool SIInstrInfo::reverseBranchCondition(
Matt Arsenault72fcd5f2016-05-21 00:29:34 +00001616 SmallVectorImpl<MachineOperand> &Cond) const {
Jan Sjodina06bfe02017-05-15 20:18:37 +00001617 if (Cond.size() != 2) {
1618 return true;
1619 }
1620
1621 if (Cond[0].isImm()) {
1622 Cond[0].setImm(-Cond[0].getImm());
1623 return false;
1624 }
1625
1626 return true;
Matt Arsenault72fcd5f2016-05-21 00:29:34 +00001627}
1628
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001629bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
1630 ArrayRef<MachineOperand> Cond,
1631 unsigned TrueReg, unsigned FalseReg,
1632 int &CondCycles,
1633 int &TrueCycles, int &FalseCycles) const {
1634 switch (Cond[0].getImm()) {
1635 case VCCNZ:
1636 case VCCZ: {
1637 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1638 const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1639 assert(MRI.getRegClass(FalseReg) == RC);
1640
1641 int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1642 CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1643
1644 // Limit to equal cost for branch vs. N v_cndmask_b32s.
1645 return !RI.isSGPRClass(RC) && NumInsts <= 6;
1646 }
1647 case SCC_TRUE:
1648 case SCC_FALSE: {
1649 // FIXME: We could insert for VGPRs if we could replace the original compare
1650 // with a vector one.
1651 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1652 const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1653 assert(MRI.getRegClass(FalseReg) == RC);
1654
1655 int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1656
1657 // Multiples of 8 can do s_cselect_b64
1658 if (NumInsts % 2 == 0)
1659 NumInsts /= 2;
1660
1661 CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1662 return RI.isSGPRClass(RC);
1663 }
1664 default:
1665 return false;
1666 }
1667}
1668
1669void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
1670 MachineBasicBlock::iterator I, const DebugLoc &DL,
1671 unsigned DstReg, ArrayRef<MachineOperand> Cond,
1672 unsigned TrueReg, unsigned FalseReg) const {
1673 BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
1674 if (Pred == VCCZ || Pred == SCC_FALSE) {
1675 Pred = static_cast<BranchPredicate>(-Pred);
1676 std::swap(TrueReg, FalseReg);
1677 }
1678
1679 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1680 const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001681 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001682
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001683 if (DstSize == 32) {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001684 unsigned SelOp = Pred == SCC_TRUE ?
1685 AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
1686
1687 // Instruction's operands are backwards from what is expected.
1688 MachineInstr *Select =
1689 BuildMI(MBB, I, DL, get(SelOp), DstReg)
1690 .addReg(FalseReg)
1691 .addReg(TrueReg);
1692
1693 preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1694 return;
1695 }
1696
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001697 if (DstSize == 64 && Pred == SCC_TRUE) {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001698 MachineInstr *Select =
1699 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
1700 .addReg(FalseReg)
1701 .addReg(TrueReg);
1702
1703 preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1704 return;
1705 }
1706
1707 static const int16_t Sub0_15[] = {
1708 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1709 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1710 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1711 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1712 };
1713
1714 static const int16_t Sub0_15_64[] = {
1715 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1716 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1717 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1718 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1719 };
1720
1721 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
1722 const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
1723 const int16_t *SubIndices = Sub0_15;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001724 int NElts = DstSize / 32;
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001725
1726 // 64-bit select is only avaialble for SALU.
1727 if (Pred == SCC_TRUE) {
1728 SelOp = AMDGPU::S_CSELECT_B64;
1729 EltRC = &AMDGPU::SGPR_64RegClass;
1730 SubIndices = Sub0_15_64;
1731
1732 assert(NElts % 2 == 0);
1733 NElts /= 2;
1734 }
1735
1736 MachineInstrBuilder MIB = BuildMI(
1737 MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
1738
1739 I = MIB->getIterator();
1740
1741 SmallVector<unsigned, 8> Regs;
1742 for (int Idx = 0; Idx != NElts; ++Idx) {
1743 unsigned DstElt = MRI.createVirtualRegister(EltRC);
1744 Regs.push_back(DstElt);
1745
1746 unsigned SubIdx = SubIndices[Idx];
1747
1748 MachineInstr *Select =
1749 BuildMI(MBB, I, DL, get(SelOp), DstElt)
1750 .addReg(FalseReg, 0, SubIdx)
1751 .addReg(TrueReg, 0, SubIdx);
1752 preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1753
1754 MIB.addReg(DstElt)
1755 .addImm(SubIdx);
1756 }
1757}
1758
Sam Kolton27e0f8b2017-03-31 11:42:43 +00001759bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
1760 switch (MI.getOpcode()) {
1761 case AMDGPU::V_MOV_B32_e32:
1762 case AMDGPU::V_MOV_B32_e64:
1763 case AMDGPU::V_MOV_B64_PSEUDO: {
1764 // If there are additional implicit register operands, this may be used for
1765 // register indexing so the source register operand isn't simply copied.
1766 unsigned NumOps = MI.getDesc().getNumOperands() +
1767 MI.getDesc().getNumImplicitUses();
1768
1769 return MI.getNumOperands() == NumOps;
1770 }
1771 case AMDGPU::S_MOV_B32:
1772 case AMDGPU::S_MOV_B64:
1773 case AMDGPU::COPY:
1774 return true;
1775 default:
1776 return false;
1777 }
1778}
1779
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001780static void removeModOperands(MachineInstr &MI) {
1781 unsigned Opc = MI.getOpcode();
1782 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1783 AMDGPU::OpName::src0_modifiers);
1784 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1785 AMDGPU::OpName::src1_modifiers);
1786 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1787 AMDGPU::OpName::src2_modifiers);
1788
1789 MI.RemoveOperand(Src2ModIdx);
1790 MI.RemoveOperand(Src1ModIdx);
1791 MI.RemoveOperand(Src0ModIdx);
1792}
1793
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001794bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001795 unsigned Reg, MachineRegisterInfo *MRI) const {
1796 if (!MRI->hasOneNonDBGUse(Reg))
1797 return false;
1798
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001799 unsigned Opc = UseMI.getOpcode();
Tom Stellard2add8a12016-09-06 20:00:26 +00001800 if (Opc == AMDGPU::COPY) {
1801 bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
1802 switch (DefMI.getOpcode()) {
1803 default:
1804 return false;
1805 case AMDGPU::S_MOV_B64:
1806 // TODO: We could fold 64-bit immediates, but this get compilicated
1807 // when there are sub-registers.
1808 return false;
1809
1810 case AMDGPU::V_MOV_B32_e32:
1811 case AMDGPU::S_MOV_B32:
1812 break;
1813 }
1814 unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1815 const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
1816 assert(ImmOp);
1817 // FIXME: We could handle FrameIndex values here.
1818 if (!ImmOp->isImm()) {
1819 return false;
1820 }
1821 UseMI.setDesc(get(NewOpc));
1822 UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
1823 UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
1824 return true;
1825 }
1826
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001827 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
1828 Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
Matt Arsenault2ed21932017-02-27 20:21:31 +00001829 // Don't fold if we are using source or output modifiers. The new VOP2
1830 // instructions don't have them.
1831 if (hasAnyModifiersSet(UseMI))
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001832 return false;
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001833
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001834 const MachineOperand &ImmOp = DefMI.getOperand(1);
Matt Arsenault3d1c1de2016-04-14 21:58:24 +00001835
1836 // If this is a free constant, there's no reason to do this.
1837 // TODO: We could fold this here instead of letting SIFoldOperands do it
1838 // later.
Matt Arsenault4bd72362016-12-10 00:39:12 +00001839 MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
1840
1841 // Any src operand can be used for the legality check.
1842 if (isInlineConstant(UseMI, *Src0, ImmOp))
Matt Arsenault3d1c1de2016-04-14 21:58:24 +00001843 return false;
1844
Matt Arsenault2ed21932017-02-27 20:21:31 +00001845 bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001846 MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
1847 MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001848
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001849 // Multiplied part is the constant: Use v_madmk_{f16, f32}.
Matt Arsenaultf0783302015-02-21 21:29:10 +00001850 // We should only expect these to be on src0 due to canonicalizations.
1851 if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenaulta266bd82016-03-02 04:05:14 +00001852 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenaultf0783302015-02-21 21:29:10 +00001853 return false;
1854
Matt Arsenaulta266bd82016-03-02 04:05:14 +00001855 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenaultf0783302015-02-21 21:29:10 +00001856 return false;
1857
Nikolay Haustov65607812016-03-11 09:27:25 +00001858 // We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenaultf0783302015-02-21 21:29:10 +00001859
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001860 const int64_t Imm = DefMI.getOperand(1).getImm();
Matt Arsenaultf0783302015-02-21 21:29:10 +00001861
1862 // FIXME: This would be a lot easier if we could return a new instruction
1863 // instead of having to modify in place.
1864
1865 // Remove these first since they are at the end.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001866 UseMI.RemoveOperand(
1867 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1868 UseMI.RemoveOperand(
1869 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenaultf0783302015-02-21 21:29:10 +00001870
1871 unsigned Src1Reg = Src1->getReg();
1872 unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenaultf0783302015-02-21 21:29:10 +00001873 Src0->setReg(Src1Reg);
1874 Src0->setSubReg(Src1SubReg);
Matt Arsenault5e100162015-04-24 01:57:58 +00001875 Src0->setIsKill(Src1->isKill());
1876
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001877 if (Opc == AMDGPU::V_MAC_F32_e64 ||
1878 Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001879 UseMI.untieRegOperand(
1880 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellarddb5a11f2015-07-13 15:47:57 +00001881
Nikolay Haustov65607812016-03-11 09:27:25 +00001882 Src1->ChangeToImmediate(Imm);
Matt Arsenaultf0783302015-02-21 21:29:10 +00001883
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001884 removeModOperands(UseMI);
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001885 UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
Matt Arsenaultf0783302015-02-21 21:29:10 +00001886
1887 bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1888 if (DeleteDef)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001889 DefMI.eraseFromParent();
Matt Arsenaultf0783302015-02-21 21:29:10 +00001890
1891 return true;
1892 }
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001893
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001894 // Added part is the constant: Use v_madak_{f16, f32}.
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001895 if (Src2->isReg() && Src2->getReg() == Reg) {
1896 // Not allowed to use constant bus for another operand.
1897 // We can however allow an inline immediate as src0.
1898 if (!Src0->isImm() &&
1899 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
1900 return false;
1901
Matt Arsenaulta266bd82016-03-02 04:05:14 +00001902 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001903 return false;
1904
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001905 const int64_t Imm = DefMI.getOperand(1).getImm();
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001906
1907 // FIXME: This would be a lot easier if we could return a new instruction
1908 // instead of having to modify in place.
1909
1910 // Remove these first since they are at the end.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001911 UseMI.RemoveOperand(
1912 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1913 UseMI.RemoveOperand(
1914 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001915
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001916 if (Opc == AMDGPU::V_MAC_F32_e64 ||
1917 Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001918 UseMI.untieRegOperand(
1919 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellarddb5a11f2015-07-13 15:47:57 +00001920
1921 // ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001922 Src2->ChangeToImmediate(Imm);
1923
1924 // These come before src2.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001925 removeModOperands(UseMI);
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001926 UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001927
1928 bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1929 if (DeleteDef)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001930 DefMI.eraseFromParent();
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001931
1932 return true;
1933 }
1934 }
1935
1936 return false;
1937}
1938
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001939static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
1940 int WidthB, int OffsetB) {
1941 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1942 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1943 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1944 return LowOffset + LowWidth <= HighOffset;
1945}
1946
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001947bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
1948 MachineInstr &MIb) const {
Chad Rosierc27a18f2016-03-09 16:00:35 +00001949 unsigned BaseReg0, BaseReg1;
1950 int64_t Offset0, Offset1;
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001951
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001952 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
1953 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
Tom Stellardcb6ba622016-04-30 00:23:06 +00001954
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001955 if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
Tom Stellardcb6ba622016-04-30 00:23:06 +00001956 // FIXME: Handle ds_read2 / ds_write2.
1957 return false;
1958 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001959 unsigned Width0 = (*MIa.memoperands_begin())->getSize();
1960 unsigned Width1 = (*MIb.memoperands_begin())->getSize();
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001961 if (BaseReg0 == BaseReg1 &&
1962 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
1963 return true;
1964 }
1965 }
1966
1967 return false;
1968}
1969
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001970bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
1971 MachineInstr &MIb,
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001972 AliasAnalysis *AA) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001973 assert((MIa.mayLoad() || MIa.mayStore()) &&
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001974 "MIa must load from or modify a memory location");
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001975 assert((MIb.mayLoad() || MIb.mayStore()) &&
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001976 "MIb must load from or modify a memory location");
1977
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001978 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects())
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001979 return false;
1980
1981 // XXX - Can we relax this between address spaces?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001982 if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001983 return false;
1984
Tom Stellard662f3302016-08-29 12:05:32 +00001985 if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
1986 const MachineMemOperand *MMOa = *MIa.memoperands_begin();
1987 const MachineMemOperand *MMOb = *MIb.memoperands_begin();
1988 if (MMOa->getValue() && MMOb->getValue()) {
1989 MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
1990 MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
1991 if (!AA->alias(LocA, LocB))
1992 return true;
1993 }
1994 }
1995
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001996 // TODO: Should we check the address space from the MachineMemOperand? That
1997 // would allow us to distinguish objects we know don't alias based on the
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00001998 // underlying address space, even if it was lowered to a different one,
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001999 // e.g. private accesses lowered to use MUBUF instructions on a scratch
2000 // buffer.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002001 if (isDS(MIa)) {
2002 if (isDS(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002003 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2004
Matt Arsenault9608a2892017-07-29 01:26:21 +00002005 return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb);
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002006 }
2007
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002008 if (isMUBUF(MIa) || isMTBUF(MIa)) {
2009 if (isMUBUF(MIb) || isMTBUF(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002010 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2011
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002012 return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002013 }
2014
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002015 if (isSMRD(MIa)) {
2016 if (isSMRD(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002017 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2018
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002019 return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002020 }
2021
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002022 if (isFLAT(MIa)) {
2023 if (isFLAT(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002024 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2025
2026 return false;
2027 }
2028
2029 return false;
2030}
2031
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002032MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002033 MachineInstr &MI,
2034 LiveVariables *LV) const {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002035 bool IsF16 = false;
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002036
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002037 switch (MI.getOpcode()) {
2038 default:
2039 return nullptr;
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002040 case AMDGPU::V_MAC_F16_e64:
2041 IsF16 = true;
Simon Pilgrim0f5b3502017-07-07 10:18:57 +00002042 LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002043 case AMDGPU::V_MAC_F32_e64:
2044 break;
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002045 case AMDGPU::V_MAC_F16_e32:
2046 IsF16 = true;
Simon Pilgrim0f5b3502017-07-07 10:18:57 +00002047 LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002048 case AMDGPU::V_MAC_F32_e32: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002049 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
2050 AMDGPU::OpName::src0);
2051 const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
2052 if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002053 return nullptr;
2054 break;
2055 }
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002056 }
2057
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002058 const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2059 const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002060 const MachineOperand *Src0Mods =
2061 getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002062 const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002063 const MachineOperand *Src1Mods =
2064 getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002065 const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002066 const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2067 const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002068
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002069 return BuildMI(*MBB, MI, MI.getDebugLoc(),
2070 get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
Diana Picus116bbab2017-01-13 09:58:52 +00002071 .add(*Dst)
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002072 .addImm(Src0Mods ? Src0Mods->getImm() : 0)
Diana Picus116bbab2017-01-13 09:58:52 +00002073 .add(*Src0)
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002074 .addImm(Src1Mods ? Src1Mods->getImm() : 0)
Diana Picus116bbab2017-01-13 09:58:52 +00002075 .add(*Src1)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002076 .addImm(0) // Src mods
Diana Picus116bbab2017-01-13 09:58:52 +00002077 .add(*Src2)
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002078 .addImm(Clamp ? Clamp->getImm() : 0)
2079 .addImm(Omod ? Omod->getImm() : 0);
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002080}
2081
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00002082// It's not generally safe to move VALU instructions across these since it will
2083// start using the register as a base index rather than directly.
2084// XXX - Why isn't hasSideEffects sufficient for these?
2085static bool changesVGPRIndexingMode(const MachineInstr &MI) {
2086 switch (MI.getOpcode()) {
2087 case AMDGPU::S_SET_GPR_IDX_ON:
2088 case AMDGPU::S_SET_GPR_IDX_MODE:
2089 case AMDGPU::S_SET_GPR_IDX_OFF:
2090 return true;
2091 default:
2092 return false;
2093 }
2094}
2095
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002096bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00002097 const MachineBasicBlock *MBB,
2098 const MachineFunction &MF) const {
Matt Arsenault95c78972016-07-09 01:13:51 +00002099 // XXX - Do we want the SP check in the base implementation?
2100
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00002101 // Target-independent instructions do not have an implicit-use of EXEC, even
2102 // when they operate on VGPRs. Treating EXEC modifications as scheduling
2103 // boundaries prevents incorrect movements of such instructions.
Matt Arsenault95c78972016-07-09 01:13:51 +00002104 return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00002105 MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
Tom Stellard8485fa02016-12-07 02:42:15 +00002106 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
2107 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00002108 changesVGPRIndexingMode(MI);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00002109}
2110
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +00002111bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault26faed32016-12-05 22:26:17 +00002112 switch (Imm.getBitWidth()) {
2113 case 32:
2114 return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
2115 ST.hasInv2PiInlineImm());
2116 case 64:
2117 return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
2118 ST.hasInv2PiInlineImm());
Matt Arsenault4bd72362016-12-10 00:39:12 +00002119 case 16:
Matt Arsenault9dba9bd2017-02-02 02:27:04 +00002120 return ST.has16BitInsts() &&
2121 AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
Matt Arsenault4bd72362016-12-10 00:39:12 +00002122 ST.hasInv2PiInlineImm());
Matt Arsenault26faed32016-12-05 22:26:17 +00002123 default:
2124 llvm_unreachable("invalid bitwidth");
Matt Arsenault303011a2014-12-17 21:04:08 +00002125 }
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +00002126}
2127
Matt Arsenault11a4d672015-02-13 19:05:03 +00002128bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
Matt Arsenault4bd72362016-12-10 00:39:12 +00002129 uint8_t OperandType) const {
Sam Kolton549c89d2017-06-21 08:53:38 +00002130 if (!MO.isImm() ||
2131 OperandType < AMDGPU::OPERAND_SRC_FIRST ||
2132 OperandType > AMDGPU::OPERAND_SRC_LAST)
Matt Arsenault4bd72362016-12-10 00:39:12 +00002133 return false;
2134
2135 // MachineOperand provides no way to tell the true operand size, since it only
2136 // records a 64-bit value. We need to know the size to determine if a 32-bit
2137 // floating point immediate bit pattern is legal for an integer immediate. It
2138 // would be for any 32-bit integer operand, but would not be for a 64-bit one.
2139
2140 int64_t Imm = MO.getImm();
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002141 switch (OperandType) {
2142 case AMDGPU::OPERAND_REG_IMM_INT32:
2143 case AMDGPU::OPERAND_REG_IMM_FP32:
2144 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2145 case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002146 int32_t Trunc = static_cast<int32_t>(Imm);
2147 return Trunc == Imm &&
2148 AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault11a4d672015-02-13 19:05:03 +00002149 }
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002150 case AMDGPU::OPERAND_REG_IMM_INT64:
2151 case AMDGPU::OPERAND_REG_IMM_FP64:
2152 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2153 case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002154 return AMDGPU::isInlinableLiteral64(MO.getImm(),
2155 ST.hasInv2PiInlineImm());
2156 }
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002157 case AMDGPU::OPERAND_REG_IMM_INT16:
2158 case AMDGPU::OPERAND_REG_IMM_FP16:
2159 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2160 case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002161 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
Matt Arsenault9dba9bd2017-02-02 02:27:04 +00002162 // A few special case instructions have 16-bit operands on subtargets
2163 // where 16-bit instructions are not legal.
2164 // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
2165 // constants in these cases
Matt Arsenault4bd72362016-12-10 00:39:12 +00002166 int16_t Trunc = static_cast<int16_t>(Imm);
Matt Arsenault9dba9bd2017-02-02 02:27:04 +00002167 return ST.has16BitInsts() &&
2168 AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault4bd72362016-12-10 00:39:12 +00002169 }
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +00002170
Matt Arsenault4bd72362016-12-10 00:39:12 +00002171 return false;
2172 }
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002173 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2174 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2175 uint32_t Trunc = static_cast<uint32_t>(Imm);
2176 return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
2177 }
Matt Arsenault4bd72362016-12-10 00:39:12 +00002178 default:
2179 llvm_unreachable("invalid bitwidth");
2180 }
Tom Stellard93fabce2013-10-10 17:11:55 +00002181}
2182
Matt Arsenaultc1ebd822016-08-13 01:43:54 +00002183bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
Matt Arsenault4bd72362016-12-10 00:39:12 +00002184 const MCOperandInfo &OpInfo) const {
Matt Arsenaultc1ebd822016-08-13 01:43:54 +00002185 switch (MO.getType()) {
2186 case MachineOperand::MO_Register:
2187 return false;
2188 case MachineOperand::MO_Immediate:
Matt Arsenault4bd72362016-12-10 00:39:12 +00002189 return !isInlineConstant(MO, OpInfo);
Matt Arsenaultc1ebd822016-08-13 01:43:54 +00002190 case MachineOperand::MO_FrameIndex:
2191 case MachineOperand::MO_MachineBasicBlock:
2192 case MachineOperand::MO_ExternalSymbol:
2193 case MachineOperand::MO_GlobalAddress:
2194 case MachineOperand::MO_MCSymbol:
2195 return true;
2196 default:
2197 llvm_unreachable("unexpected operand type");
2198 }
2199}
2200
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002201static bool compareMachineOp(const MachineOperand &Op0,
2202 const MachineOperand &Op1) {
2203 if (Op0.getType() != Op1.getType())
2204 return false;
2205
2206 switch (Op0.getType()) {
2207 case MachineOperand::MO_Register:
2208 return Op0.getReg() == Op1.getReg();
2209 case MachineOperand::MO_Immediate:
2210 return Op0.getImm() == Op1.getImm();
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002211 default:
2212 llvm_unreachable("Didn't expect to be comparing these operand types");
2213 }
2214}
2215
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002216bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
2217 const MachineOperand &MO) const {
2218 const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
Tom Stellardb02094e2014-07-21 15:45:01 +00002219
Tom Stellardfb77f002015-01-13 22:59:41 +00002220 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
Tom Stellardb02094e2014-07-21 15:45:01 +00002221
2222 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
2223 return true;
2224
2225 if (OpInfo.RegClass < 0)
2226 return false;
2227
Matt Arsenault4bd72362016-12-10 00:39:12 +00002228 if (MO.isImm() && isInlineConstant(MO, OpInfo))
2229 return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002230
Matt Arsenault4bd72362016-12-10 00:39:12 +00002231 return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellardb02094e2014-07-21 15:45:01 +00002232}
2233
Tom Stellard86d12eb2014-08-01 00:32:28 +00002234bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsaka93603d2015-01-15 18:42:51 +00002235 int Op32 = AMDGPU::getVOPe32(Opcode);
2236 if (Op32 == -1)
2237 return false;
2238
2239 return pseudoToMCOpcode(Op32) != -1;
Tom Stellard86d12eb2014-08-01 00:32:28 +00002240}
2241
Tom Stellardb4a313a2014-08-01 00:32:39 +00002242bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
2243 // The src0_modifier operand is present on all instructions
2244 // that have modifiers.
2245
2246 return AMDGPU::getNamedOperandIdx(Opcode,
2247 AMDGPU::OpName::src0_modifiers) != -1;
2248}
2249
Matt Arsenaultace5b762014-10-17 18:00:43 +00002250bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
2251 unsigned OpName) const {
2252 const MachineOperand *Mods = getNamedOperand(MI, OpName);
2253 return Mods && Mods->getImm();
2254}
2255
Matt Arsenault2ed21932017-02-27 20:21:31 +00002256bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
2257 return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
2258 hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
2259 hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
2260 hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
2261 hasModifiersSet(MI, AMDGPU::OpName::omod);
2262}
2263
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002264bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault11a4d672015-02-13 19:05:03 +00002265 const MachineOperand &MO,
Matt Arsenault4bd72362016-12-10 00:39:12 +00002266 const MCOperandInfo &OpInfo) const {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002267 // Literal constants use the constant bus.
Matt Arsenault4bd72362016-12-10 00:39:12 +00002268 //if (isLiteralConstantLike(MO, OpInfo))
2269 // return true;
2270 if (MO.isImm())
2271 return !isInlineConstant(MO, OpInfo);
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002272
Matt Arsenault4bd72362016-12-10 00:39:12 +00002273 if (!MO.isReg())
2274 return true; // Misc other operands like FrameIndex
2275
2276 if (!MO.isUse())
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002277 return false;
2278
2279 if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2280 return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
2281
2282 // FLAT_SCR is just an SGPR pair.
2283 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
2284 return true;
2285
2286 // EXEC register uses the constant bus.
2287 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
2288 return true;
2289
2290 // SGPRs use the constant bus
Matt Arsenault8226fc42016-03-02 23:00:21 +00002291 return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
2292 (!MO.isImplicit() &&
2293 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
2294 AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002295}
2296
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00002297static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
2298 for (const MachineOperand &MO : MI.implicit_operands()) {
2299 // We only care about reads.
2300 if (MO.isDef())
2301 continue;
2302
2303 switch (MO.getReg()) {
2304 case AMDGPU::VCC:
2305 case AMDGPU::M0:
2306 case AMDGPU::FLAT_SCR:
2307 return MO.getReg();
2308
2309 default:
2310 break;
2311 }
2312 }
2313
2314 return AMDGPU::NoRegister;
2315}
2316
Matt Arsenault529cf252016-06-23 01:26:16 +00002317static bool shouldReadExec(const MachineInstr &MI) {
2318 if (SIInstrInfo::isVALU(MI)) {
2319 switch (MI.getOpcode()) {
2320 case AMDGPU::V_READLANE_B32:
2321 case AMDGPU::V_READLANE_B32_si:
2322 case AMDGPU::V_READLANE_B32_vi:
2323 case AMDGPU::V_WRITELANE_B32:
2324 case AMDGPU::V_WRITELANE_B32_si:
2325 case AMDGPU::V_WRITELANE_B32_vi:
2326 return false;
2327 }
2328
2329 return true;
2330 }
2331
2332 if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
2333 SIInstrInfo::isSALU(MI) ||
2334 SIInstrInfo::isSMRD(MI))
2335 return false;
2336
2337 return true;
2338}
2339
Matt Arsenaultcb540bc2016-07-19 00:35:03 +00002340static bool isSubRegOf(const SIRegisterInfo &TRI,
2341 const MachineOperand &SuperVec,
2342 const MachineOperand &SubReg) {
2343 if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
2344 return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
2345
2346 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
2347 SubReg.getReg() == SuperVec.getReg();
2348}
2349
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002350bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
Tom Stellard93fabce2013-10-10 17:11:55 +00002351 StringRef &ErrInfo) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002352 uint16_t Opcode = MI.getOpcode();
Tom Stellarddde28a82017-05-26 16:40:03 +00002353 if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
2354 return true;
2355
Matt Arsenault89ad17c2017-06-12 16:37:55 +00002356 const MachineFunction *MF = MI.getParent()->getParent();
2357 const MachineRegisterInfo &MRI = MF->getRegInfo();
2358
Tom Stellard93fabce2013-10-10 17:11:55 +00002359 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2360 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2361 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2362
Tom Stellardca700e42014-03-17 17:03:49 +00002363 // Make sure the number of operands is correct.
2364 const MCInstrDesc &Desc = get(Opcode);
2365 if (!Desc.isVariadic() &&
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002366 Desc.getNumOperands() != MI.getNumExplicitOperands()) {
2367 ErrInfo = "Instruction has wrong number of operands.";
2368 return false;
Tom Stellardca700e42014-03-17 17:03:49 +00002369 }
2370
Matt Arsenault3d463192016-11-01 22:55:07 +00002371 if (MI.isInlineAsm()) {
2372 // Verify register classes for inlineasm constraints.
2373 for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
2374 I != E; ++I) {
2375 const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
2376 if (!RC)
2377 continue;
2378
2379 const MachineOperand &Op = MI.getOperand(I);
2380 if (!Op.isReg())
2381 continue;
2382
2383 unsigned Reg = Op.getReg();
2384 if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
2385 ErrInfo = "inlineasm operand has incorrect register class.";
2386 return false;
2387 }
2388 }
2389
2390 return true;
2391 }
2392
Changpeng Fangc9963932015-12-18 20:04:28 +00002393 // Make sure the register classes are correct.
Tom Stellardb4a313a2014-08-01 00:32:39 +00002394 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002395 if (MI.getOperand(i).isFPImm()) {
Tom Stellardfb77f002015-01-13 22:59:41 +00002396 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
2397 "all fp values to integers.";
2398 return false;
2399 }
2400
Marek Olsak8eeebcc2015-02-18 22:12:41 +00002401 int RegClass = Desc.OpInfo[i].RegClass;
2402
Tom Stellardca700e42014-03-17 17:03:49 +00002403 switch (Desc.OpInfo[i].OperandType) {
Tom Stellard1106b1c2015-01-20 17:49:41 +00002404 case MCOI::OPERAND_REGISTER:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002405 if (MI.getOperand(i).isImm()) {
Tom Stellard1106b1c2015-01-20 17:49:41 +00002406 ErrInfo = "Illegal immediate value for operand.";
2407 return false;
2408 }
2409 break;
Matt Arsenault4bd72362016-12-10 00:39:12 +00002410 case AMDGPU::OPERAND_REG_IMM_INT32:
2411 case AMDGPU::OPERAND_REG_IMM_FP32:
Tom Stellard1106b1c2015-01-20 17:49:41 +00002412 break;
Matt Arsenault4bd72362016-12-10 00:39:12 +00002413 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2414 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2415 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2416 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2417 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2418 case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
2419 const MachineOperand &MO = MI.getOperand(i);
2420 if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
Marek Olsak8eeebcc2015-02-18 22:12:41 +00002421 ErrInfo = "Illegal immediate value for operand.";
2422 return false;
Tom Stellarda305f932014-07-02 20:53:44 +00002423 }
Tom Stellardca700e42014-03-17 17:03:49 +00002424 break;
Matt Arsenault4bd72362016-12-10 00:39:12 +00002425 }
Tom Stellardca700e42014-03-17 17:03:49 +00002426 case MCOI::OPERAND_IMMEDIATE:
Matt Arsenaultffc82752016-07-05 17:09:01 +00002427 case AMDGPU::OPERAND_KIMM32:
Tom Stellardb02094e2014-07-21 15:45:01 +00002428 // Check if this operand is an immediate.
2429 // FrameIndex operands will be replaced by immediates, so they are
2430 // allowed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002431 if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
Tom Stellardca700e42014-03-17 17:03:49 +00002432 ErrInfo = "Expected immediate, but got non-immediate";
2433 return false;
2434 }
Justin Bognerb03fd122016-08-17 05:10:15 +00002435 LLVM_FALLTHROUGH;
Tom Stellardca700e42014-03-17 17:03:49 +00002436 default:
2437 continue;
2438 }
2439
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002440 if (!MI.getOperand(i).isReg())
Tom Stellardca700e42014-03-17 17:03:49 +00002441 continue;
2442
Tom Stellardca700e42014-03-17 17:03:49 +00002443 if (RegClass != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002444 unsigned Reg = MI.getOperand(i).getReg();
Matt Arsenault1322b6f2016-07-09 01:13:56 +00002445 if (Reg == AMDGPU::NoRegister ||
2446 TargetRegisterInfo::isVirtualRegister(Reg))
Tom Stellardca700e42014-03-17 17:03:49 +00002447 continue;
2448
2449 const TargetRegisterClass *RC = RI.getRegClass(RegClass);
2450 if (!RC->contains(Reg)) {
2451 ErrInfo = "Operand has incorrect register class.";
2452 return false;
2453 }
2454 }
2455 }
2456
Sam Kolton549c89d2017-06-21 08:53:38 +00002457 // Verify SDWA
2458 if (isSDWA(MI)) {
2459
2460 if (!ST.hasSDWA()) {
2461 ErrInfo = "SDWA is not supported on this target";
2462 return false;
2463 }
2464
2465 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
Sam Kolton549c89d2017-06-21 08:53:38 +00002466
2467 const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
2468
2469 for (int OpIdx: OpIndicies) {
2470 if (OpIdx == -1)
2471 continue;
2472 const MachineOperand &MO = MI.getOperand(OpIdx);
2473
Sam Kolton3c4933f2017-06-22 06:26:41 +00002474 if (!ST.hasSDWAScalar()) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002475 // Only VGPRS on VI
2476 if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
2477 ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
2478 return false;
2479 }
2480 } else {
2481 // No immediates on GFX9
2482 if (!MO.isReg()) {
2483 ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
2484 return false;
2485 }
2486 }
2487 }
2488
Sam Kolton3c4933f2017-06-22 06:26:41 +00002489 if (!ST.hasSDWAOmod()) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002490 // No omod allowed on VI
2491 const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2492 if (OMod != nullptr &&
2493 (!OMod->isImm() || OMod->getImm() != 0)) {
2494 ErrInfo = "OMod not allowed in SDWA instructions on VI";
2495 return false;
2496 }
2497 }
2498
2499 uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
2500 if (isVOPC(BasicOpcode)) {
Sam Kolton3c4933f2017-06-22 06:26:41 +00002501 if (!ST.hasSDWASdst() && DstIdx != -1) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002502 // Only vcc allowed as dst on VI for VOPC
2503 const MachineOperand &Dst = MI.getOperand(DstIdx);
2504 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
2505 ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
2506 return false;
2507 }
Sam Koltona179d252017-06-27 15:02:23 +00002508 } else if (!ST.hasSDWAOutModsVOPC()) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002509 // No clamp allowed on GFX9 for VOPC
2510 const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
Sam Koltona179d252017-06-27 15:02:23 +00002511 if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002512 ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
2513 return false;
2514 }
Sam Koltona179d252017-06-27 15:02:23 +00002515
2516 // No omod allowed on GFX9 for VOPC
2517 const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2518 if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
2519 ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
2520 return false;
2521 }
Sam Kolton549c89d2017-06-21 08:53:38 +00002522 }
2523 }
2524 }
2525
Tom Stellard93fabce2013-10-10 17:11:55 +00002526 // Verify VOP*
Sam Kolton549c89d2017-06-21 08:53:38 +00002527 if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
Matt Arsenaulte368cb32014-12-11 23:37:32 +00002528 // Only look at the true operands. Only a real operand can use the constant
2529 // bus, and we don't want to check pseudo-operands like the source modifier
2530 // flags.
2531 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2532
Tom Stellard93fabce2013-10-10 17:11:55 +00002533 unsigned ConstantBusCount = 0;
Matt Arsenaultffc82752016-07-05 17:09:01 +00002534
2535 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
2536 ++ConstantBusCount;
2537
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002538 unsigned SGPRUsed = findImplicitSGPRRead(MI);
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00002539 if (SGPRUsed != AMDGPU::NoRegister)
2540 ++ConstantBusCount;
2541
Matt Arsenaulte368cb32014-12-11 23:37:32 +00002542 for (int OpIdx : OpIndices) {
2543 if (OpIdx == -1)
2544 break;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002545 const MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault4bd72362016-12-10 00:39:12 +00002546 if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002547 if (MO.isReg()) {
2548 if (MO.getReg() != SGPRUsed)
Tom Stellard93fabce2013-10-10 17:11:55 +00002549 ++ConstantBusCount;
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002550 SGPRUsed = MO.getReg();
2551 } else {
2552 ++ConstantBusCount;
Tom Stellard93fabce2013-10-10 17:11:55 +00002553 }
2554 }
Tom Stellard93fabce2013-10-10 17:11:55 +00002555 }
2556 if (ConstantBusCount > 1) {
2557 ErrInfo = "VOP* instruction uses the constant bus more than once";
2558 return false;
2559 }
2560 }
2561
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002562 // Verify misc. restrictions on specific instructions.
2563 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
2564 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002565 const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2566 const MachineOperand &Src1 = MI.getOperand(Src1Idx);
2567 const MachineOperand &Src2 = MI.getOperand(Src2Idx);
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002568 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
2569 if (!compareMachineOp(Src0, Src1) &&
2570 !compareMachineOp(Src0, Src2)) {
2571 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
2572 return false;
2573 }
2574 }
2575 }
2576
Matt Arsenault7ccf6cd2016-09-16 21:41:16 +00002577 if (isSOPK(MI)) {
2578 int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2579 if (sopkIsZext(MI)) {
2580 if (!isUInt<16>(Imm)) {
2581 ErrInfo = "invalid immediate for SOPK instruction";
2582 return false;
2583 }
2584 } else {
2585 if (!isInt<16>(Imm)) {
2586 ErrInfo = "invalid immediate for SOPK instruction";
2587 return false;
2588 }
2589 }
2590 }
2591
Matt Arsenaultcb540bc2016-07-19 00:35:03 +00002592 if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
2593 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
2594 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2595 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
2596 const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2597 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
2598
2599 const unsigned StaticNumOps = Desc.getNumOperands() +
2600 Desc.getNumImplicitUses();
2601 const unsigned NumImplicitOps = IsDst ? 2 : 1;
2602
Nicolai Haehnle368972c2016-11-02 17:03:11 +00002603 // Allow additional implicit operands. This allows a fixup done by the post
2604 // RA scheduler where the main implicit operand is killed and implicit-defs
2605 // are added for sub-registers that remain live after this instruction.
2606 if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
Matt Arsenaultcb540bc2016-07-19 00:35:03 +00002607 ErrInfo = "missing implicit register operands";
2608 return false;
2609 }
2610
2611 const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2612 if (IsDst) {
2613 if (!Dst->isUse()) {
2614 ErrInfo = "v_movreld_b32 vdst should be a use operand";
2615 return false;
2616 }
2617
2618 unsigned UseOpIdx;
2619 if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
2620 UseOpIdx != StaticNumOps + 1) {
2621 ErrInfo = "movrel implicit operands should be tied";
2622 return false;
2623 }
2624 }
2625
2626 const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2627 const MachineOperand &ImpUse
2628 = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
2629 if (!ImpUse.isReg() || !ImpUse.isUse() ||
2630 !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
2631 ErrInfo = "src0 should be subreg of implicit vector use";
2632 return false;
2633 }
2634 }
2635
Matt Arsenaultd092a062015-10-02 18:58:37 +00002636 // Make sure we aren't losing exec uses in the td files. This mostly requires
2637 // being careful when using let Uses to try to add other use registers.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002638 if (shouldReadExec(MI)) {
2639 if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
Matt Arsenaultd092a062015-10-02 18:58:37 +00002640 ErrInfo = "VALU instruction does not implicitly read exec mask";
2641 return false;
2642 }
2643 }
2644
Matt Arsenault7b647552016-10-28 21:55:15 +00002645 if (isSMRD(MI)) {
2646 if (MI.mayStore()) {
2647 // The register offset form of scalar stores may only use m0 as the
2648 // soffset register.
2649 const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
2650 if (Soff && Soff->getReg() != AMDGPU::M0) {
2651 ErrInfo = "scalar stores must use m0 as offset register";
2652 return false;
2653 }
2654 }
2655 }
2656
Matt Arsenault89ad17c2017-06-12 16:37:55 +00002657 if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
2658 const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
2659 if (Offset->getImm() != 0) {
2660 ErrInfo = "subtarget does not support offsets in flat instructions";
2661 return false;
2662 }
2663 }
2664
Tom Stellard93fabce2013-10-10 17:11:55 +00002665 return true;
2666}
2667
Matt Arsenaultf14032a2013-11-15 22:02:28 +00002668unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard82166022013-11-13 23:36:37 +00002669 switch (MI.getOpcode()) {
2670 default: return AMDGPU::INSTRUCTION_LIST_END;
2671 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
2672 case AMDGPU::COPY: return AMDGPU::COPY;
2673 case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard204e61b2014-04-07 19:45:45 +00002674 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Connor Abbott8c217d02017-08-04 18:36:49 +00002675 case AMDGPU::WQM: return AMDGPU::WQM;
Connor Abbott92638ab2017-08-04 18:36:52 +00002676 case AMDGPU::WWM: return AMDGPU::WWM;
Tom Stellarde0387202014-03-21 15:51:54 +00002677 case AMDGPU::S_MOV_B32:
2678 return MI.getOperand(1).isReg() ?
Tom Stellard8c12fd92014-03-24 16:12:34 +00002679 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard80942a12014-09-05 14:07:59 +00002680 case AMDGPU::S_ADD_I32:
2681 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
Matt Arsenault43b8e4e2013-11-18 20:09:29 +00002682 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
Tom Stellard80942a12014-09-05 14:07:59 +00002683 case AMDGPU::S_SUB_I32:
2684 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
Matt Arsenault43b8e4e2013-11-18 20:09:29 +00002685 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault869cd072014-09-03 23:24:35 +00002686 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault124384f2016-09-09 23:32:53 +00002687 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
2688 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
2689 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
2690 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
2691 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
2692 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
2693 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
Tom Stellard82166022013-11-13 23:36:37 +00002694 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
2695 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
2696 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
2697 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
2698 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
2699 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault27cc9582014-04-18 01:53:18 +00002700 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
2701 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault78b86702014-04-18 05:19:26 +00002702 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
2703 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak63a7b082015-03-24 13:40:21 +00002704 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault43160e72014-06-18 17:13:57 +00002705 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault2c335622014-04-09 07:16:16 +00002706 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault689f3252014-06-09 16:36:31 +00002707 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault0cb92e12014-04-11 19:25:18 +00002708 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
2709 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
2710 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
2711 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
2712 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
2713 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellardbc4497b2016-02-12 23:45:29 +00002714 case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
2715 case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
2716 case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
2717 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
2718 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
2719 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Matt Arsenault7b1dc2c2016-09-17 02:02:19 +00002720 case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
2721 case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
Marek Olsakc5368502015-01-15 18:43:01 +00002722 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault295b86e2014-06-17 17:36:27 +00002723 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault85796012014-06-17 17:36:24 +00002724 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsakd2af89d2015-03-04 17:33:45 +00002725 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellardbc4497b2016-02-12 23:45:29 +00002726 case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
2727 case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard82166022013-11-13 23:36:37 +00002728 }
2729}
2730
2731bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
2732 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
2733}
2734
2735const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
2736 unsigned OpNo) const {
2737 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2738 const MCInstrDesc &Desc = get(MI.getOpcode());
2739 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
Matt Arsenault102a7042014-12-11 23:37:34 +00002740 Desc.OpInfo[OpNo].RegClass == -1) {
2741 unsigned Reg = MI.getOperand(OpNo).getReg();
2742
2743 if (TargetRegisterInfo::isVirtualRegister(Reg))
2744 return MRI.getRegClass(Reg);
Matt Arsenault11a4d672015-02-13 19:05:03 +00002745 return RI.getPhysRegClass(Reg);
Matt Arsenault102a7042014-12-11 23:37:34 +00002746 }
Tom Stellard82166022013-11-13 23:36:37 +00002747
2748 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
2749 return RI.getRegClass(RCID);
2750}
2751
2752bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
2753 switch (MI.getOpcode()) {
2754 case AMDGPU::COPY:
2755 case AMDGPU::REG_SEQUENCE:
Tom Stellard4f3b04d2014-04-17 21:00:07 +00002756 case AMDGPU::PHI:
Tom Stellarda5687382014-05-15 14:41:55 +00002757 case AMDGPU::INSERT_SUBREG:
Tom Stellard82166022013-11-13 23:36:37 +00002758 return RI.hasVGPRs(getOpRegClass(MI, 0));
2759 default:
2760 return RI.hasVGPRs(getOpRegClass(MI, OpNo));
2761 }
2762}
2763
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002764void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
Tom Stellard82166022013-11-13 23:36:37 +00002765 MachineBasicBlock::iterator I = MI;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002766 MachineBasicBlock *MBB = MI.getParent();
2767 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002768 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002769 unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
Tom Stellard82166022013-11-13 23:36:37 +00002770 const TargetRegisterClass *RC = RI.getRegClass(RCID);
2771 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002772 if (MO.isReg())
Tom Stellard82166022013-11-13 23:36:37 +00002773 Opcode = AMDGPU::COPY;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002774 else if (RI.isSGPRClass(RC))
Matt Arsenault671a0052013-11-14 10:08:50 +00002775 Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002776
Matt Arsenault3a4d86a2013-11-18 20:09:55 +00002777 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002778 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard0c93c9e2014-09-05 14:08:01 +00002779 VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002780 else
Tom Stellard45c0b3a2015-01-07 20:59:25 +00002781 VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002782
Matt Arsenault3a4d86a2013-11-18 20:09:55 +00002783 unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002784 DebugLoc DL = MBB->findDebugLoc(I);
Diana Picus116bbab2017-01-13 09:58:52 +00002785 BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
Tom Stellard82166022013-11-13 23:36:37 +00002786 MO.ChangeToRegister(Reg, false);
2787}
2788
Tom Stellard15834092014-03-21 15:51:57 +00002789unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
2790 MachineRegisterInfo &MRI,
2791 MachineOperand &SuperReg,
2792 const TargetRegisterClass *SuperRC,
2793 unsigned SubIdx,
2794 const TargetRegisterClass *SubRC)
2795 const {
Matt Arsenaultc8e2ce42015-09-24 07:16:37 +00002796 MachineBasicBlock *MBB = MI->getParent();
2797 DebugLoc DL = MI->getDebugLoc();
Tom Stellard15834092014-03-21 15:51:57 +00002798 unsigned SubReg = MRI.createVirtualRegister(SubRC);
2799
Matt Arsenaultc8e2ce42015-09-24 07:16:37 +00002800 if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
2801 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2802 .addReg(SuperReg.getReg(), 0, SubIdx);
2803 return SubReg;
2804 }
2805
Tom Stellard15834092014-03-21 15:51:57 +00002806 // Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault08d84942014-06-03 23:06:13 +00002807 // value so we don't need to worry about merging its subreg index with the
2808 // SubIdx passed to this function. The register coalescer should be able to
Tom Stellard15834092014-03-21 15:51:57 +00002809 // eliminate this extra copy.
Matt Arsenaultc8e2ce42015-09-24 07:16:37 +00002810 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard15834092014-03-21 15:51:57 +00002811
Matt Arsenault7480a0e2014-11-17 21:11:37 +00002812 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
2813 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
2814
2815 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2816 .addReg(NewSuperReg, 0, SubIdx);
2817
Tom Stellard15834092014-03-21 15:51:57 +00002818 return SubReg;
2819}
2820
Matt Arsenault248b7b62014-03-24 20:08:09 +00002821MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
2822 MachineBasicBlock::iterator MII,
2823 MachineRegisterInfo &MRI,
2824 MachineOperand &Op,
2825 const TargetRegisterClass *SuperRC,
2826 unsigned SubIdx,
2827 const TargetRegisterClass *SubRC) const {
2828 if (Op.isImm()) {
Matt Arsenault248b7b62014-03-24 20:08:09 +00002829 if (SubIdx == AMDGPU::sub0)
Matt Arsenaultd745c282016-09-08 17:44:36 +00002830 return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
Matt Arsenault248b7b62014-03-24 20:08:09 +00002831 if (SubIdx == AMDGPU::sub1)
Matt Arsenaultd745c282016-09-08 17:44:36 +00002832 return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
Matt Arsenault248b7b62014-03-24 20:08:09 +00002833
2834 llvm_unreachable("Unhandled register index for immediate");
2835 }
2836
2837 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
2838 SubIdx, SubRC);
2839 return MachineOperand::CreateReg(SubReg, false);
2840}
2841
Marek Olsakbe047802014-12-07 12:19:03 +00002842// Change the order of operands from (0, 1, 2) to (0, 2, 1)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002843void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
2844 assert(Inst.getNumExplicitOperands() == 3);
2845 MachineOperand Op1 = Inst.getOperand(1);
2846 Inst.RemoveOperand(1);
2847 Inst.addOperand(Op1);
Marek Olsakbe047802014-12-07 12:19:03 +00002848}
2849
Matt Arsenault856d1922015-12-01 19:57:17 +00002850bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
2851 const MCOperandInfo &OpInfo,
2852 const MachineOperand &MO) const {
2853 if (!MO.isReg())
2854 return false;
2855
2856 unsigned Reg = MO.getReg();
2857 const TargetRegisterClass *RC =
2858 TargetRegisterInfo::isVirtualRegister(Reg) ?
2859 MRI.getRegClass(Reg) :
2860 RI.getPhysRegClass(Reg);
2861
Nicolai Haehnle82fc9622016-01-07 17:10:29 +00002862 const SIRegisterInfo *TRI =
2863 static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
2864 RC = TRI->getSubRegClass(RC, MO.getSubReg());
2865
Matt Arsenault856d1922015-12-01 19:57:17 +00002866 // In order to be legal, the common sub-class must be equal to the
2867 // class of the current operand. For example:
2868 //
Sam Kolton1eeb11b2016-09-09 14:44:04 +00002869 // v_mov_b32 s0 ; Operand defined as vsrc_b32
2870 // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
Matt Arsenault856d1922015-12-01 19:57:17 +00002871 //
2872 // s_sendmsg 0, s0 ; Operand defined as m0reg
2873 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
2874
2875 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
2876}
2877
2878bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
2879 const MCOperandInfo &OpInfo,
2880 const MachineOperand &MO) const {
2881 if (MO.isReg())
2882 return isLegalRegOperand(MRI, OpInfo, MO);
2883
2884 // Handle non-register types that are treated like immediates.
2885 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
2886 return true;
2887}
2888
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002889bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
Tom Stellard0e975cf2014-08-01 00:32:35 +00002890 const MachineOperand *MO) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002891 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2892 const MCInstrDesc &InstDesc = MI.getDesc();
Tom Stellard0e975cf2014-08-01 00:32:35 +00002893 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
2894 const TargetRegisterClass *DefinedRC =
2895 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
2896 if (!MO)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002897 MO = &MI.getOperand(OpIdx);
Tom Stellard0e975cf2014-08-01 00:32:35 +00002898
Matt Arsenault4bd72362016-12-10 00:39:12 +00002899 if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
Matt Arsenaultfcb345f2016-02-11 06:15:39 +00002900
2901 RegSubRegPair SGPRUsed;
2902 if (MO->isReg())
2903 SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
2904
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002905 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002906 if (i == OpIdx)
2907 continue;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002908 const MachineOperand &Op = MI.getOperand(i);
Matt Arsenaultffc82752016-07-05 17:09:01 +00002909 if (Op.isReg()) {
2910 if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault4bd72362016-12-10 00:39:12 +00002911 usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
Matt Arsenaultffc82752016-07-05 17:09:01 +00002912 return false;
2913 }
2914 } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002915 return false;
2916 }
2917 }
2918 }
2919
Tom Stellard0e975cf2014-08-01 00:32:35 +00002920 if (MO->isReg()) {
2921 assert(DefinedRC);
Matt Arsenault856d1922015-12-01 19:57:17 +00002922 return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard0e975cf2014-08-01 00:32:35 +00002923 }
2924
Tom Stellard0e975cf2014-08-01 00:32:35 +00002925 // Handle non-register types that are treated like immediates.
Tom Stellardfb77f002015-01-13 22:59:41 +00002926 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
Tom Stellard0e975cf2014-08-01 00:32:35 +00002927
Matt Arsenault4364fef2014-09-23 18:30:57 +00002928 if (!DefinedRC) {
2929 // This operand expects an immediate.
Tom Stellard0e975cf2014-08-01 00:32:35 +00002930 return true;
Matt Arsenault4364fef2014-09-23 18:30:57 +00002931 }
Tom Stellard0e975cf2014-08-01 00:32:35 +00002932
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002933 return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard0e975cf2014-08-01 00:32:35 +00002934}
2935
Matt Arsenault856d1922015-12-01 19:57:17 +00002936void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002937 MachineInstr &MI) const {
2938 unsigned Opc = MI.getOpcode();
Matt Arsenault856d1922015-12-01 19:57:17 +00002939 const MCInstrDesc &InstrDesc = get(Opc);
2940
2941 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002942 MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault856d1922015-12-01 19:57:17 +00002943
2944 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
2945 // we need to only have one constant bus use.
2946 //
2947 // Note we do not need to worry about literal constants here. They are
2948 // disabled for the operand type for instructions because they will always
2949 // violate the one constant bus use rule.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002950 bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
Matt Arsenault856d1922015-12-01 19:57:17 +00002951 if (HasImplicitSGPR) {
2952 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002953 MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault856d1922015-12-01 19:57:17 +00002954
2955 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
2956 legalizeOpWithMove(MI, Src0Idx);
2957 }
2958
2959 // VOP2 src0 instructions support all operand types, so we don't need to check
2960 // their legality. If src1 is already legal, we don't need to do anything.
2961 if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
2962 return;
2963
Nicolai Haehnle5dea6452017-04-24 17:17:36 +00002964 // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
2965 // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
2966 // select is uniform.
2967 if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
2968 RI.isVGPR(MRI, Src1.getReg())) {
2969 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
2970 const DebugLoc &DL = MI.getDebugLoc();
2971 BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
2972 .add(Src1);
2973 Src1.ChangeToRegister(Reg, false);
2974 return;
2975 }
2976
Matt Arsenault856d1922015-12-01 19:57:17 +00002977 // We do not use commuteInstruction here because it is too aggressive and will
2978 // commute if it is possible. We only want to commute here if it improves
2979 // legality. This can be called a fairly large number of times so don't waste
2980 // compile time pointlessly swapping and checking legality again.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002981 if (HasImplicitSGPR || !MI.isCommutable()) {
Matt Arsenault856d1922015-12-01 19:57:17 +00002982 legalizeOpWithMove(MI, Src1Idx);
2983 return;
2984 }
2985
2986 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002987 MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault856d1922015-12-01 19:57:17 +00002988
2989 // If src0 can be used as src1, commuting will make the operands legal.
2990 // Otherwise we have to give up and insert a move.
2991 //
2992 // TODO: Other immediate-like operand kinds could be commuted if there was a
2993 // MachineOperand::ChangeTo* for them.
2994 if ((!Src1.isImm() && !Src1.isReg()) ||
2995 !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
2996 legalizeOpWithMove(MI, Src1Idx);
2997 return;
2998 }
2999
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003000 int CommutedOpc = commuteOpcode(MI);
Matt Arsenault856d1922015-12-01 19:57:17 +00003001 if (CommutedOpc == -1) {
3002 legalizeOpWithMove(MI, Src1Idx);
3003 return;
3004 }
3005
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003006 MI.setDesc(get(CommutedOpc));
Matt Arsenault856d1922015-12-01 19:57:17 +00003007
3008 unsigned Src0Reg = Src0.getReg();
3009 unsigned Src0SubReg = Src0.getSubReg();
3010 bool Src0Kill = Src0.isKill();
3011
3012 if (Src1.isImm())
3013 Src0.ChangeToImmediate(Src1.getImm());
3014 else if (Src1.isReg()) {
3015 Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
3016 Src0.setSubReg(Src1.getSubReg());
3017 } else
3018 llvm_unreachable("Should only have register or immediate operands");
3019
3020 Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
3021 Src1.setSubReg(Src0SubReg);
3022}
3023
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003024// Legalize VOP3 operands. Because all operand types are supported for any
3025// operand, and since literal constants are not allowed and should never be
3026// seen, we only need to worry about inserting copies if we use multiple SGPR
3027// operands.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003028void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
3029 MachineInstr &MI) const {
3030 unsigned Opc = MI.getOpcode();
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003031
3032 int VOP3Idx[3] = {
3033 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
3034 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
3035 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
3036 };
3037
3038 // Find the one SGPR operand we are allowed to use.
3039 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
3040
3041 for (unsigned i = 0; i < 3; ++i) {
3042 int Idx = VOP3Idx[i];
3043 if (Idx == -1)
3044 break;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003045 MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003046
3047 // We should never see a VOP3 instruction with an illegal immediate operand.
3048 if (!MO.isReg())
3049 continue;
3050
3051 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
3052 continue; // VGPRs are legal
3053
3054 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
3055 SGPRReg = MO.getReg();
3056 // We can use one SGPR in each VOP3 instruction.
3057 continue;
3058 }
3059
3060 // If we make it this far, then the operand is not legal and we must
3061 // legalize it.
3062 legalizeOpWithMove(MI, Idx);
3063 }
3064}
3065
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003066unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
3067 MachineRegisterInfo &MRI) const {
Tom Stellard1397d492016-02-11 21:45:07 +00003068 const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
3069 const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
3070 unsigned DstReg = MRI.createVirtualRegister(SRC);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00003071 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
Tom Stellard1397d492016-02-11 21:45:07 +00003072
3073 SmallVector<unsigned, 8> SRegs;
3074 for (unsigned i = 0; i < SubRegs; ++i) {
3075 unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003076 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
Tom Stellard1397d492016-02-11 21:45:07 +00003077 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003078 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
Tom Stellard1397d492016-02-11 21:45:07 +00003079 SRegs.push_back(SGPR);
3080 }
3081
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003082 MachineInstrBuilder MIB =
3083 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3084 get(AMDGPU::REG_SEQUENCE), DstReg);
Tom Stellard1397d492016-02-11 21:45:07 +00003085 for (unsigned i = 0; i < SubRegs; ++i) {
3086 MIB.addReg(SRegs[i]);
3087 MIB.addImm(RI.getSubRegFromChannel(i));
3088 }
3089 return DstReg;
3090}
3091
Tom Stellard467b5b92016-02-20 00:37:25 +00003092void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003093 MachineInstr &MI) const {
Tom Stellard467b5b92016-02-20 00:37:25 +00003094
3095 // If the pointer is store in VGPRs, then we need to move them to
3096 // SGPRs using v_readfirstlane. This is safe because we only select
3097 // loads with uniform pointers to SMRD instruction so we know the
3098 // pointer value is uniform.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003099 MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
Tom Stellard467b5b92016-02-20 00:37:25 +00003100 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
3101 unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
3102 SBase->setReg(SGPR);
3103 }
3104}
3105
Tom Stellard0d162b12016-11-16 18:42:17 +00003106void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
3107 MachineBasicBlock::iterator I,
3108 const TargetRegisterClass *DstRC,
3109 MachineOperand &Op,
3110 MachineRegisterInfo &MRI,
3111 const DebugLoc &DL) const {
3112
3113 unsigned OpReg = Op.getReg();
3114 unsigned OpSubReg = Op.getSubReg();
3115
3116 const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
3117 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
3118
3119 // Check if operand is already the correct register class.
3120 if (DstRC == OpRC)
3121 return;
3122
3123 unsigned DstReg = MRI.createVirtualRegister(DstRC);
Diana Picus116bbab2017-01-13 09:58:52 +00003124 MachineInstr *Copy =
3125 BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Tom Stellard0d162b12016-11-16 18:42:17 +00003126
3127 Op.setReg(DstReg);
3128 Op.setSubReg(0);
3129
3130 MachineInstr *Def = MRI.getVRegDef(OpReg);
3131 if (!Def)
3132 return;
3133
3134 // Try to eliminate the copy if it is copying an immediate value.
3135 if (Def->isMoveImmediate())
3136 FoldImmediate(*Copy, *Def, OpReg, &MRI);
3137}
3138
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003139void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003140 MachineFunction &MF = *MI.getParent()->getParent();
3141 MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard82166022013-11-13 23:36:37 +00003142
3143 // Legalize VOP2
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003144 if (isVOP2(MI) || isVOPC(MI)) {
Matt Arsenault856d1922015-12-01 19:57:17 +00003145 legalizeOperandsVOP2(MRI, MI);
Tom Stellard0e975cf2014-08-01 00:32:35 +00003146 return;
Tom Stellard82166022013-11-13 23:36:37 +00003147 }
3148
3149 // Legalize VOP3
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003150 if (isVOP3(MI)) {
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003151 legalizeOperandsVOP3(MRI, MI);
Matt Arsenaulte068f9a2015-09-24 07:51:28 +00003152 return;
Tom Stellard82166022013-11-13 23:36:37 +00003153 }
3154
Tom Stellard467b5b92016-02-20 00:37:25 +00003155 // Legalize SMRD
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003156 if (isSMRD(MI)) {
Tom Stellard467b5b92016-02-20 00:37:25 +00003157 legalizeOperandsSMRD(MRI, MI);
3158 return;
3159 }
3160
Tom Stellard4f3b04d2014-04-17 21:00:07 +00003161 // Legalize REG_SEQUENCE and PHI
Tom Stellard82166022013-11-13 23:36:37 +00003162 // The register class of the operands much be the same type as the register
3163 // class of the output.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003164 if (MI.getOpcode() == AMDGPU::PHI) {
Craig Topper062a2ba2014-04-25 05:30:21 +00003165 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003166 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
3167 if (!MI.getOperand(i).isReg() ||
3168 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
Tom Stellard82166022013-11-13 23:36:37 +00003169 continue;
3170 const TargetRegisterClass *OpRC =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003171 MRI.getRegClass(MI.getOperand(i).getReg());
Tom Stellard82166022013-11-13 23:36:37 +00003172 if (RI.hasVGPRs(OpRC)) {
3173 VRC = OpRC;
3174 } else {
3175 SRC = OpRC;
3176 }
3177 }
3178
3179 // If any of the operands are VGPR registers, then they all most be
3180 // otherwise we will create illegal VGPR->SGPR copies when legalizing
3181 // them.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003182 if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
Tom Stellard82166022013-11-13 23:36:37 +00003183 if (!VRC) {
3184 assert(SRC);
3185 VRC = RI.getEquivalentVGPRClass(SRC);
3186 }
3187 RC = VRC;
3188 } else {
3189 RC = SRC;
3190 }
3191
3192 // Update all the operands so they have the same type.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003193 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3194 MachineOperand &Op = MI.getOperand(I);
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003195 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard82166022013-11-13 23:36:37 +00003196 continue;
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003197
3198 // MI is a PHI instruction.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003199 MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003200 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
3201
Tom Stellard0d162b12016-11-16 18:42:17 +00003202 // Avoid creating no-op copies with the same src and dst reg class. These
3203 // confuse some of the machine passes.
3204 legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003205 }
3206 }
3207
3208 // REG_SEQUENCE doesn't really require operand legalization, but if one has a
3209 // VGPR dest type and SGPR sources, insert copies so all operands are
3210 // VGPRs. This seems to help operand folding / the register coalescer.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003211 if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
3212 MachineBasicBlock *MBB = MI.getParent();
3213 const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003214 if (RI.hasVGPRs(DstRC)) {
3215 // Update all the operands so they are VGPR register classes. These may
3216 // not be the same register class because REG_SEQUENCE supports mixing
3217 // subregister index types e.g. sub0_sub1 + sub2 + sub3
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003218 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3219 MachineOperand &Op = MI.getOperand(I);
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003220 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
3221 continue;
3222
3223 const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
3224 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
3225 if (VRC == OpRC)
3226 continue;
3227
Tom Stellard0d162b12016-11-16 18:42:17 +00003228 legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003229 Op.setIsKill();
Tom Stellard4f3b04d2014-04-17 21:00:07 +00003230 }
Tom Stellard82166022013-11-13 23:36:37 +00003231 }
Matt Arsenaulte068f9a2015-09-24 07:51:28 +00003232
3233 return;
Tom Stellard82166022013-11-13 23:36:37 +00003234 }
Tom Stellard15834092014-03-21 15:51:57 +00003235
Tom Stellarda5687382014-05-15 14:41:55 +00003236 // Legalize INSERT_SUBREG
3237 // src0 must have the same register class as dst
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003238 if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
3239 unsigned Dst = MI.getOperand(0).getReg();
3240 unsigned Src0 = MI.getOperand(1).getReg();
Tom Stellarda5687382014-05-15 14:41:55 +00003241 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
3242 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
3243 if (DstRC != Src0RC) {
Tom Stellard0d162b12016-11-16 18:42:17 +00003244 MachineBasicBlock *MBB = MI.getParent();
3245 MachineOperand &Op = MI.getOperand(1);
3246 legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
Tom Stellarda5687382014-05-15 14:41:55 +00003247 }
3248 return;
3249 }
3250
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003251 // Legalize MIMG and MUBUF/MTBUF for shaders.
3252 //
3253 // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
3254 // scratch memory access. In both cases, the legalization never involves
3255 // conversion to the addr64 form.
3256 if (isMIMG(MI) ||
3257 (AMDGPU::isShader(MF.getFunction()->getCallingConv()) &&
3258 (isMUBUF(MI) || isMTBUF(MI)))) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003259 MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
Tom Stellard1397d492016-02-11 21:45:07 +00003260 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
3261 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
3262 SRsrc->setReg(SGPR);
3263 }
3264
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003265 MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
Tom Stellard1397d492016-02-11 21:45:07 +00003266 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
3267 unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
3268 SSamp->setReg(SGPR);
3269 }
3270 return;
3271 }
3272
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003273 // Legalize MUBUF* instructions by converting to addr64 form.
Tom Stellard15834092014-03-21 15:51:57 +00003274 // FIXME: If we start using the non-addr64 instructions for compute, we
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003275 // may need to legalize them as above. This especially applies to the
3276 // buffer_load_format_* variants and variants with idxen (or bothen).
Tom Stellard155bbb72014-08-11 22:18:17 +00003277 int SRsrcIdx =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003278 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
Tom Stellard155bbb72014-08-11 22:18:17 +00003279 if (SRsrcIdx != -1) {
3280 // We have an MUBUF instruction
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003281 MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
3282 unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
Tom Stellard155bbb72014-08-11 22:18:17 +00003283 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
3284 RI.getRegClass(SRsrcRC))) {
3285 // The operands are legal.
3286 // FIXME: We may need to legalize operands besided srsrc.
3287 return;
3288 }
Tom Stellard15834092014-03-21 15:51:57 +00003289
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003290 MachineBasicBlock &MBB = *MI.getParent();
Matt Arsenaultef67d762015-09-09 17:03:29 +00003291
Eric Christopher572e03a2015-06-19 01:53:21 +00003292 // Extract the ptr from the resource descriptor.
Matt Arsenaultef67d762015-09-09 17:03:29 +00003293 unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
3294 &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
Tom Stellard15834092014-03-21 15:51:57 +00003295
Tom Stellard155bbb72014-08-11 22:18:17 +00003296 // Create an empty resource descriptor
3297 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3298 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3299 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3300 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard794c8c02014-12-02 17:05:41 +00003301 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
Tom Stellard15834092014-03-21 15:51:57 +00003302
Tom Stellard155bbb72014-08-11 22:18:17 +00003303 // Zero64 = 0
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003304 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
3305 .addImm(0);
Tom Stellard15834092014-03-21 15:51:57 +00003306
Tom Stellard155bbb72014-08-11 22:18:17 +00003307 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003308 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
3309 .addImm(RsrcDataFormat & 0xFFFFFFFF);
Tom Stellard15834092014-03-21 15:51:57 +00003310
Tom Stellard155bbb72014-08-11 22:18:17 +00003311 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003312 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
3313 .addImm(RsrcDataFormat >> 32);
Tom Stellard15834092014-03-21 15:51:57 +00003314
Tom Stellard155bbb72014-08-11 22:18:17 +00003315 // NewSRsrc = {Zero64, SRsrcFormat}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003316 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
3317 .addReg(Zero64)
3318 .addImm(AMDGPU::sub0_sub1)
3319 .addReg(SRsrcFormatLo)
3320 .addImm(AMDGPU::sub2)
3321 .addReg(SRsrcFormatHi)
3322 .addImm(AMDGPU::sub3);
Tom Stellard155bbb72014-08-11 22:18:17 +00003323
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003324 MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
Tom Stellard155bbb72014-08-11 22:18:17 +00003325 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard155bbb72014-08-11 22:18:17 +00003326 if (VAddr) {
3327 // This is already an ADDR64 instruction so we need to add the pointer
3328 // extracted from the resource descriptor to the current value of VAddr.
Matt Arsenaultef67d762015-09-09 17:03:29 +00003329 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3330 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard155bbb72014-08-11 22:18:17 +00003331
Matt Arsenaultef67d762015-09-09 17:03:29 +00003332 // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003333 DebugLoc DL = MI.getDebugLoc();
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003334 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Matt Arsenaultef67d762015-09-09 17:03:29 +00003335 .addReg(SRsrcPtr, 0, AMDGPU::sub0)
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003336 .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard15834092014-03-21 15:51:57 +00003337
Matt Arsenaultef67d762015-09-09 17:03:29 +00003338 // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003339 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Matt Arsenaultef67d762015-09-09 17:03:29 +00003340 .addReg(SRsrcPtr, 0, AMDGPU::sub1)
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003341 .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard15834092014-03-21 15:51:57 +00003342
Matt Arsenaultef67d762015-09-09 17:03:29 +00003343 // NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003344 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
3345 .addReg(NewVAddrLo)
3346 .addImm(AMDGPU::sub0)
3347 .addReg(NewVAddrHi)
3348 .addImm(AMDGPU::sub1);
Tom Stellard155bbb72014-08-11 22:18:17 +00003349 } else {
3350 // This instructions is the _OFFSET variant, so we need to convert it to
3351 // ADDR64.
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003352 assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
3353 < SISubtarget::VOLCANIC_ISLANDS &&
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003354 "FIXME: Need to emit flat atomics here");
3355
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003356 MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
3357 MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
3358 MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
3359 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003360
3361 // Atomics rith return have have an additional tied operand and are
3362 // missing some of the special bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003363 MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003364 MachineInstr *Addr64;
3365
3366 if (!VDataIn) {
3367 // Regular buffer load / store.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003368 MachineInstrBuilder MIB =
3369 BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus116bbab2017-01-13 09:58:52 +00003370 .add(*VData)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003371 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3372 // This will be replaced later
3373 // with the new value of vaddr.
Diana Picus116bbab2017-01-13 09:58:52 +00003374 .add(*SRsrc)
3375 .add(*SOffset)
3376 .add(*Offset);
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003377
3378 // Atomics do not have this operand.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003379 if (const MachineOperand *GLC =
3380 getNamedOperand(MI, AMDGPU::OpName::glc)) {
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003381 MIB.addImm(GLC->getImm());
3382 }
3383
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003384 MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003385
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003386 if (const MachineOperand *TFE =
3387 getNamedOperand(MI, AMDGPU::OpName::tfe)) {
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003388 MIB.addImm(TFE->getImm());
3389 }
3390
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003391 MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003392 Addr64 = MIB;
3393 } else {
3394 // Atomics with return.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003395 Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus116bbab2017-01-13 09:58:52 +00003396 .add(*VData)
3397 .add(*VDataIn)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003398 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3399 // This will be replaced later
3400 // with the new value of vaddr.
Diana Picus116bbab2017-01-13 09:58:52 +00003401 .add(*SRsrc)
3402 .add(*SOffset)
3403 .add(*Offset)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003404 .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
3405 .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003406 }
Tom Stellard15834092014-03-21 15:51:57 +00003407
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003408 MI.removeFromParent();
Tom Stellard15834092014-03-21 15:51:57 +00003409
Matt Arsenaultef67d762015-09-09 17:03:29 +00003410 // NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003411 BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
3412 NewVAddr)
3413 .addReg(SRsrcPtr, 0, AMDGPU::sub0)
3414 .addImm(AMDGPU::sub0)
3415 .addReg(SRsrcPtr, 0, AMDGPU::sub1)
3416 .addImm(AMDGPU::sub1);
Matt Arsenaultef67d762015-09-09 17:03:29 +00003417
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003418 VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
3419 SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
Tom Stellard15834092014-03-21 15:51:57 +00003420 }
Tom Stellard155bbb72014-08-11 22:18:17 +00003421
Tom Stellard155bbb72014-08-11 22:18:17 +00003422 // Update the instruction to use NewVaddr
3423 VAddr->setReg(NewVAddr);
3424 // Update the instruction to use NewSRsrc
3425 SRsrc->setReg(NewSRsrc);
Tom Stellard15834092014-03-21 15:51:57 +00003426 }
Tom Stellard82166022013-11-13 23:36:37 +00003427}
3428
3429void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Alfred Huang5b270722017-07-14 17:56:55 +00003430 SetVectorType Worklist;
3431 Worklist.insert(&TopInst);
Tom Stellard82166022013-11-13 23:36:37 +00003432
3433 while (!Worklist.empty()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003434 MachineInstr &Inst = *Worklist.pop_back_val();
3435 MachineBasicBlock *MBB = Inst.getParent();
Tom Stellarde0387202014-03-21 15:51:54 +00003436 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
3437
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003438 unsigned Opcode = Inst.getOpcode();
3439 unsigned NewOpcode = getVALUOp(Inst);
Matt Arsenault27cc9582014-04-18 01:53:18 +00003440
Tom Stellarde0387202014-03-21 15:51:54 +00003441 // Handle some special cases
Matt Arsenault27cc9582014-04-18 01:53:18 +00003442 switch (Opcode) {
Tom Stellard0c354f22014-04-30 15:31:29 +00003443 default:
Tom Stellard0c354f22014-04-30 15:31:29 +00003444 break;
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003445 case AMDGPU::S_AND_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003446 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003447 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003448 continue;
3449
3450 case AMDGPU::S_OR_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003451 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003452 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003453 continue;
3454
3455 case AMDGPU::S_XOR_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003456 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003457 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003458 continue;
3459
3460 case AMDGPU::S_NOT_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003461 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003462 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003463 continue;
3464
Matt Arsenault8333e432014-06-10 19:18:24 +00003465 case AMDGPU::S_BCNT1_I32_B64:
3466 splitScalar64BitBCNT(Worklist, Inst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003467 Inst.eraseFromParent();
Matt Arsenault8333e432014-06-10 19:18:24 +00003468 continue;
3469
Matt Arsenault94812212014-11-14 18:18:16 +00003470 case AMDGPU::S_BFE_I64: {
3471 splitScalar64BitBFE(Worklist, Inst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003472 Inst.eraseFromParent();
Matt Arsenault94812212014-11-14 18:18:16 +00003473 continue;
3474 }
3475
Marek Olsakbe047802014-12-07 12:19:03 +00003476 case AMDGPU::S_LSHL_B32:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003477 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsakbe047802014-12-07 12:19:03 +00003478 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
3479 swapOperands(Inst);
3480 }
3481 break;
3482 case AMDGPU::S_ASHR_I32:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003483 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsakbe047802014-12-07 12:19:03 +00003484 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
3485 swapOperands(Inst);
3486 }
3487 break;
3488 case AMDGPU::S_LSHR_B32:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003489 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsakbe047802014-12-07 12:19:03 +00003490 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
3491 swapOperands(Inst);
3492 }
3493 break;
Marek Olsak707a6d02015-02-03 21:53:01 +00003494 case AMDGPU::S_LSHL_B64:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003495 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsak707a6d02015-02-03 21:53:01 +00003496 NewOpcode = AMDGPU::V_LSHLREV_B64;
3497 swapOperands(Inst);
3498 }
3499 break;
3500 case AMDGPU::S_ASHR_I64:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003501 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsak707a6d02015-02-03 21:53:01 +00003502 NewOpcode = AMDGPU::V_ASHRREV_I64;
3503 swapOperands(Inst);
3504 }
3505 break;
3506 case AMDGPU::S_LSHR_B64:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003507 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsak707a6d02015-02-03 21:53:01 +00003508 NewOpcode = AMDGPU::V_LSHRREV_B64;
3509 swapOperands(Inst);
3510 }
3511 break;
Marek Olsakbe047802014-12-07 12:19:03 +00003512
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003513 case AMDGPU::S_ABS_I32:
3514 lowerScalarAbs(Worklist, Inst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003515 Inst.eraseFromParent();
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003516 continue;
3517
Tom Stellardbc4497b2016-02-12 23:45:29 +00003518 case AMDGPU::S_CBRANCH_SCC0:
3519 case AMDGPU::S_CBRANCH_SCC1:
3520 // Clear unused bits of vcc
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003521 BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
3522 AMDGPU::VCC)
3523 .addReg(AMDGPU::EXEC)
3524 .addReg(AMDGPU::VCC);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003525 break;
3526
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003527 case AMDGPU::S_BFE_U64:
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003528 case AMDGPU::S_BFM_B64:
3529 llvm_unreachable("Moving this op to VALU not implemented");
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003530
3531 case AMDGPU::S_PACK_LL_B32_B16:
3532 case AMDGPU::S_PACK_LH_B32_B16:
3533 case AMDGPU::S_PACK_HH_B32_B16: {
3534 movePackToVALU(Worklist, MRI, Inst);
3535 Inst.eraseFromParent();
3536 continue;
3537 }
Tom Stellarde0387202014-03-21 15:51:54 +00003538 }
3539
Tom Stellard15834092014-03-21 15:51:57 +00003540 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
3541 // We cannot move this instruction to the VALU, so we should try to
3542 // legalize its operands instead.
3543 legalizeOperands(Inst);
Tom Stellard82166022013-11-13 23:36:37 +00003544 continue;
Tom Stellard15834092014-03-21 15:51:57 +00003545 }
Tom Stellard82166022013-11-13 23:36:37 +00003546
Tom Stellard82166022013-11-13 23:36:37 +00003547 // Use the new VALU Opcode.
3548 const MCInstrDesc &NewDesc = get(NewOpcode);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003549 Inst.setDesc(NewDesc);
Tom Stellard82166022013-11-13 23:36:37 +00003550
Matt Arsenaultf0b1e3a2013-11-18 20:09:21 +00003551 // Remove any references to SCC. Vector instructions can't read from it, and
3552 // We're just about to add the implicit use / defs of VCC, and we don't want
3553 // both.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003554 for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
3555 MachineOperand &Op = Inst.getOperand(i);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003556 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003557 Inst.RemoveOperand(i);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003558 addSCCDefUsersToVALUWorklist(Inst, Worklist);
3559 }
Matt Arsenaultf0b1e3a2013-11-18 20:09:21 +00003560 }
3561
Matt Arsenault27cc9582014-04-18 01:53:18 +00003562 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
3563 // We are converting these to a BFE, so we need to add the missing
3564 // operands for the size and offset.
3565 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003566 Inst.addOperand(MachineOperand::CreateImm(0));
3567 Inst.addOperand(MachineOperand::CreateImm(Size));
Matt Arsenault27cc9582014-04-18 01:53:18 +00003568
Matt Arsenaultb5b51102014-06-10 19:18:21 +00003569 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
3570 // The VALU version adds the second operand to the result, so insert an
3571 // extra 0 operand.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003572 Inst.addOperand(MachineOperand::CreateImm(0));
Tom Stellard82166022013-11-13 23:36:37 +00003573 }
3574
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003575 Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
Tom Stellard82166022013-11-13 23:36:37 +00003576
Matt Arsenault78b86702014-04-18 05:19:26 +00003577 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003578 const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
Matt Arsenault78b86702014-04-18 05:19:26 +00003579 // If we need to move this to VGPRs, we need to unpack the second operand
3580 // back into the 2 separate ones for bit offset and width.
3581 assert(OffsetWidthOp.isImm() &&
3582 "Scalar BFE is only implemented for constant width and offset");
3583 uint32_t Imm = OffsetWidthOp.getImm();
3584
3585 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3586 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003587 Inst.RemoveOperand(2); // Remove old immediate.
3588 Inst.addOperand(MachineOperand::CreateImm(Offset));
3589 Inst.addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault78b86702014-04-18 05:19:26 +00003590 }
3591
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003592 bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
Tom Stellardbc4497b2016-02-12 23:45:29 +00003593 unsigned NewDstReg = AMDGPU::NoRegister;
3594 if (HasDst) {
Matt Arsenault21a43822017-04-06 21:09:53 +00003595 unsigned DstReg = Inst.getOperand(0).getReg();
3596 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
3597 continue;
3598
Tom Stellardbc4497b2016-02-12 23:45:29 +00003599 // Update the destination register class.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003600 const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003601 if (!NewDstRC)
3602 continue;
Tom Stellard82166022013-11-13 23:36:37 +00003603
Tom Stellard0d162b12016-11-16 18:42:17 +00003604 if (Inst.isCopy() &&
3605 TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
3606 NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
3607 // Instead of creating a copy where src and dst are the same register
3608 // class, we just replace all uses of dst with src. These kinds of
3609 // copies interfere with the heuristics MachineSink uses to decide
3610 // whether or not to split a critical edge. Since the pass assumes
3611 // that copies will end up as machine instructions and not be
3612 // eliminated.
3613 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
3614 MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
3615 MRI.clearKillFlags(Inst.getOperand(1).getReg());
3616 Inst.getOperand(0).setReg(DstReg);
3617 continue;
3618 }
3619
Tom Stellardbc4497b2016-02-12 23:45:29 +00003620 NewDstReg = MRI.createVirtualRegister(NewDstRC);
3621 MRI.replaceRegWith(DstReg, NewDstReg);
3622 }
Tom Stellard82166022013-11-13 23:36:37 +00003623
Tom Stellarde1a24452014-04-17 21:00:01 +00003624 // Legalize the operands
3625 legalizeOperands(Inst);
3626
Tom Stellardbc4497b2016-02-12 23:45:29 +00003627 if (HasDst)
3628 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard82166022013-11-13 23:36:37 +00003629 }
3630}
3631
Alfred Huang5b270722017-07-14 17:56:55 +00003632void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003633 MachineInstr &Inst) const {
3634 MachineBasicBlock &MBB = *Inst.getParent();
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003635 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3636 MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003637 DebugLoc DL = Inst.getDebugLoc();
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003638
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003639 MachineOperand &Dest = Inst.getOperand(0);
3640 MachineOperand &Src = Inst.getOperand(1);
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003641 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3642 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3643
3644 BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
3645 .addImm(0)
3646 .addReg(Src.getReg());
3647
3648 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
3649 .addReg(Src.getReg())
3650 .addReg(TmpReg);
3651
3652 MRI.replaceRegWith(Dest.getReg(), ResultReg);
3653 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3654}
3655
Matt Arsenault689f3252014-06-09 16:36:31 +00003656void SIInstrInfo::splitScalar64BitUnaryOp(
Alfred Huang5b270722017-07-14 17:56:55 +00003657 SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003658 unsigned Opcode) const {
3659 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault689f3252014-06-09 16:36:31 +00003660 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3661
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003662 MachineOperand &Dest = Inst.getOperand(0);
3663 MachineOperand &Src0 = Inst.getOperand(1);
3664 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault689f3252014-06-09 16:36:31 +00003665
3666 MachineBasicBlock::iterator MII = Inst;
3667
3668 const MCInstrDesc &InstDesc = get(Opcode);
3669 const TargetRegisterClass *Src0RC = Src0.isReg() ?
3670 MRI.getRegClass(Src0.getReg()) :
3671 &AMDGPU::SGPR_32RegClass;
3672
3673 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3674
3675 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3676 AMDGPU::sub0, Src0SubRC);
3677
3678 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenaultf003c382015-08-26 20:47:50 +00003679 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3680 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault689f3252014-06-09 16:36:31 +00003681
Matt Arsenaultf003c382015-08-26 20:47:50 +00003682 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Diana Picus116bbab2017-01-13 09:58:52 +00003683 BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
Matt Arsenault689f3252014-06-09 16:36:31 +00003684
3685 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3686 AMDGPU::sub1, Src0SubRC);
3687
Matt Arsenaultf003c382015-08-26 20:47:50 +00003688 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Diana Picus116bbab2017-01-13 09:58:52 +00003689 BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
Matt Arsenault689f3252014-06-09 16:36:31 +00003690
Matt Arsenaultf003c382015-08-26 20:47:50 +00003691 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault689f3252014-06-09 16:36:31 +00003692 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3693 .addReg(DestSub0)
3694 .addImm(AMDGPU::sub0)
3695 .addReg(DestSub1)
3696 .addImm(AMDGPU::sub1);
3697
3698 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3699
Matt Arsenaultf003c382015-08-26 20:47:50 +00003700 // We don't need to legalizeOperands here because for a single operand, src0
3701 // will support any kind of input.
3702
3703 // Move all users of this moved value.
3704 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault689f3252014-06-09 16:36:31 +00003705}
3706
3707void SIInstrInfo::splitScalar64BitBinaryOp(
Alfred Huang5b270722017-07-14 17:56:55 +00003708 SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003709 unsigned Opcode) const {
3710 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003711 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3712
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003713 MachineOperand &Dest = Inst.getOperand(0);
3714 MachineOperand &Src0 = Inst.getOperand(1);
3715 MachineOperand &Src1 = Inst.getOperand(2);
3716 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003717
3718 MachineBasicBlock::iterator MII = Inst;
3719
3720 const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault684dc802014-03-24 20:08:13 +00003721 const TargetRegisterClass *Src0RC = Src0.isReg() ?
3722 MRI.getRegClass(Src0.getReg()) :
3723 &AMDGPU::SGPR_32RegClass;
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003724
Matt Arsenault684dc802014-03-24 20:08:13 +00003725 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3726 const TargetRegisterClass *Src1RC = Src1.isReg() ?
3727 MRI.getRegClass(Src1.getReg()) :
3728 &AMDGPU::SGPR_32RegClass;
3729
3730 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
3731
3732 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3733 AMDGPU::sub0, Src0SubRC);
3734 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3735 AMDGPU::sub0, Src1SubRC);
3736
3737 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenaultf003c382015-08-26 20:47:50 +00003738 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3739 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault684dc802014-03-24 20:08:13 +00003740
Matt Arsenaultf003c382015-08-26 20:47:50 +00003741 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003742 MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Diana Picus116bbab2017-01-13 09:58:52 +00003743 .add(SrcReg0Sub0)
3744 .add(SrcReg1Sub0);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003745
Matt Arsenault684dc802014-03-24 20:08:13 +00003746 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3747 AMDGPU::sub1, Src0SubRC);
3748 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3749 AMDGPU::sub1, Src1SubRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003750
Matt Arsenaultf003c382015-08-26 20:47:50 +00003751 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003752 MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Diana Picus116bbab2017-01-13 09:58:52 +00003753 .add(SrcReg0Sub1)
3754 .add(SrcReg1Sub1);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003755
Matt Arsenaultf003c382015-08-26 20:47:50 +00003756 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003757 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3758 .addReg(DestSub0)
3759 .addImm(AMDGPU::sub0)
3760 .addReg(DestSub1)
3761 .addImm(AMDGPU::sub1);
3762
3763 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3764
3765 // Try to legalize the operands in case we need to swap the order to keep it
3766 // valid.
Matt Arsenaultf003c382015-08-26 20:47:50 +00003767 legalizeOperands(LoHalf);
3768 legalizeOperands(HiHalf);
3769
3770 // Move all users of this moved vlaue.
3771 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003772}
3773
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003774void SIInstrInfo::splitScalar64BitBCNT(
Alfred Huang5b270722017-07-14 17:56:55 +00003775 SetVectorType &Worklist, MachineInstr &Inst) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003776 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault8333e432014-06-10 19:18:24 +00003777 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3778
3779 MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003780 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault8333e432014-06-10 19:18:24 +00003781
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003782 MachineOperand &Dest = Inst.getOperand(0);
3783 MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault8333e432014-06-10 19:18:24 +00003784
Marek Olsakc5368502015-01-15 18:43:01 +00003785 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault8333e432014-06-10 19:18:24 +00003786 const TargetRegisterClass *SrcRC = Src.isReg() ?
3787 MRI.getRegClass(Src.getReg()) :
3788 &AMDGPU::SGPR_32RegClass;
3789
3790 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3791 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3792
3793 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
3794
3795 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
3796 AMDGPU::sub0, SrcSubRC);
3797 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
3798 AMDGPU::sub1, SrcSubRC);
3799
Diana Picus116bbab2017-01-13 09:58:52 +00003800 BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
Matt Arsenault8333e432014-06-10 19:18:24 +00003801
Diana Picus116bbab2017-01-13 09:58:52 +00003802 BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
Matt Arsenault8333e432014-06-10 19:18:24 +00003803
3804 MRI.replaceRegWith(Dest.getReg(), ResultReg);
3805
Matt Arsenault5e7f95e2015-08-26 20:48:04 +00003806 // We don't need to legalize operands here. src0 for etiher instruction can be
3807 // an SGPR, and the second input is unused or determined here.
3808 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault8333e432014-06-10 19:18:24 +00003809}
3810
Alfred Huang5b270722017-07-14 17:56:55 +00003811void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003812 MachineInstr &Inst) const {
3813 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault94812212014-11-14 18:18:16 +00003814 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3815 MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003816 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault94812212014-11-14 18:18:16 +00003817
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003818 MachineOperand &Dest = Inst.getOperand(0);
3819 uint32_t Imm = Inst.getOperand(2).getImm();
Matt Arsenault94812212014-11-14 18:18:16 +00003820 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3821 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
3822
Matt Arsenault6ad34262014-11-14 18:40:49 +00003823 (void) Offset;
3824
Matt Arsenault94812212014-11-14 18:18:16 +00003825 // Only sext_inreg cases handled.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003826 assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
3827 Offset == 0 && "Not implemented");
Matt Arsenault94812212014-11-14 18:18:16 +00003828
3829 if (BitWidth < 32) {
3830 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3831 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3832 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3833
3834 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003835 .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
3836 .addImm(0)
3837 .addImm(BitWidth);
Matt Arsenault94812212014-11-14 18:18:16 +00003838
3839 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
3840 .addImm(31)
3841 .addReg(MidRegLo);
3842
3843 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
3844 .addReg(MidRegLo)
3845 .addImm(AMDGPU::sub0)
3846 .addReg(MidRegHi)
3847 .addImm(AMDGPU::sub1);
3848
3849 MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault445833c2015-08-26 20:47:58 +00003850 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault94812212014-11-14 18:18:16 +00003851 return;
3852 }
3853
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003854 MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault94812212014-11-14 18:18:16 +00003855 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3856 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3857
3858 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
3859 .addImm(31)
3860 .addReg(Src.getReg(), 0, AMDGPU::sub0);
3861
3862 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
3863 .addReg(Src.getReg(), 0, AMDGPU::sub0)
3864 .addImm(AMDGPU::sub0)
3865 .addReg(TmpReg)
3866 .addImm(AMDGPU::sub1);
3867
3868 MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault445833c2015-08-26 20:47:58 +00003869 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault94812212014-11-14 18:18:16 +00003870}
3871
Matt Arsenaultf003c382015-08-26 20:47:50 +00003872void SIInstrInfo::addUsersToMoveToVALUWorklist(
3873 unsigned DstReg,
3874 MachineRegisterInfo &MRI,
Alfred Huang5b270722017-07-14 17:56:55 +00003875 SetVectorType &Worklist) const {
Matt Arsenaultf003c382015-08-26 20:47:50 +00003876 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
Matt Arsenault4c1e9ec2016-12-20 18:55:06 +00003877 E = MRI.use_end(); I != E;) {
Matt Arsenaultf003c382015-08-26 20:47:50 +00003878 MachineInstr &UseMI = *I->getParent();
3879 if (!canReadVGPR(UseMI, I.getOperandNo())) {
Alfred Huang5b270722017-07-14 17:56:55 +00003880 Worklist.insert(&UseMI);
Matt Arsenault4c1e9ec2016-12-20 18:55:06 +00003881
3882 do {
3883 ++I;
3884 } while (I != E && I->getParent() == &UseMI);
3885 } else {
3886 ++I;
Matt Arsenaultf003c382015-08-26 20:47:50 +00003887 }
3888 }
3889}
3890
Alfred Huang5b270722017-07-14 17:56:55 +00003891void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003892 MachineRegisterInfo &MRI,
3893 MachineInstr &Inst) const {
3894 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3895 MachineBasicBlock *MBB = Inst.getParent();
3896 MachineOperand &Src0 = Inst.getOperand(1);
3897 MachineOperand &Src1 = Inst.getOperand(2);
3898 const DebugLoc &DL = Inst.getDebugLoc();
3899
3900 switch (Inst.getOpcode()) {
3901 case AMDGPU::S_PACK_LL_B32_B16: {
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003902 unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3903 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003904
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003905 // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
3906 // 0.
3907 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
3908 .addImm(0xffff);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003909
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003910 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
3911 .addReg(ImmReg, RegState::Kill)
3912 .add(Src0);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003913
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003914 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
3915 .add(Src1)
3916 .addImm(16)
3917 .addReg(TmpReg, RegState::Kill);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003918 break;
3919 }
3920 case AMDGPU::S_PACK_LH_B32_B16: {
3921 unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3922 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
3923 .addImm(0xffff);
3924 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
3925 .addReg(ImmReg, RegState::Kill)
3926 .add(Src0)
3927 .add(Src1);
3928 break;
3929 }
3930 case AMDGPU::S_PACK_HH_B32_B16: {
3931 unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3932 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3933 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
3934 .addImm(16)
3935 .add(Src0);
3936 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
Konstantin Zhuravlyov88938d42017-04-21 19:35:05 +00003937 .addImm(0xffff0000);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003938 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
3939 .add(Src1)
3940 .addReg(ImmReg, RegState::Kill)
3941 .addReg(TmpReg, RegState::Kill);
3942 break;
3943 }
3944 default:
3945 llvm_unreachable("unhandled s_pack_* instruction");
3946 }
3947
3948 MachineOperand &Dest = Inst.getOperand(0);
3949 MRI.replaceRegWith(Dest.getReg(), ResultReg);
3950 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3951}
3952
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003953void SIInstrInfo::addSCCDefUsersToVALUWorklist(
Alfred Huang5b270722017-07-14 17:56:55 +00003954 MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
Tom Stellardbc4497b2016-02-12 23:45:29 +00003955 // This assumes that all the users of SCC are in the same block
3956 // as the SCC def.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +00003957 for (MachineInstr &MI :
3958 llvm::make_range(MachineBasicBlock::iterator(SCCDefInst),
3959 SCCDefInst.getParent()->end())) {
Tom Stellardbc4497b2016-02-12 23:45:29 +00003960 // Exit if we find another SCC def.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +00003961 if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
Tom Stellardbc4497b2016-02-12 23:45:29 +00003962 return;
3963
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +00003964 if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
Alfred Huang5b270722017-07-14 17:56:55 +00003965 Worklist.insert(&MI);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003966 }
3967}
3968
Matt Arsenaultba6aae72015-09-28 20:54:57 +00003969const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
3970 const MachineInstr &Inst) const {
3971 const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
3972
3973 switch (Inst.getOpcode()) {
3974 // For target instructions, getOpRegClass just returns the virtual register
3975 // class associated with the operand, so we need to find an equivalent VGPR
3976 // register class in order to move the instruction to the VALU.
3977 case AMDGPU::COPY:
3978 case AMDGPU::PHI:
3979 case AMDGPU::REG_SEQUENCE:
3980 case AMDGPU::INSERT_SUBREG:
Connor Abbott8c217d02017-08-04 18:36:49 +00003981 case AMDGPU::WQM:
Connor Abbott92638ab2017-08-04 18:36:52 +00003982 case AMDGPU::WWM:
Matt Arsenaultba6aae72015-09-28 20:54:57 +00003983 if (RI.hasVGPRs(NewDstRC))
3984 return nullptr;
3985
3986 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
3987 if (!NewDstRC)
3988 return nullptr;
3989 return NewDstRC;
3990 default:
3991 return NewDstRC;
3992 }
3993}
3994
Matt Arsenault6c067412015-11-03 22:30:15 +00003995// Find the one SGPR operand we are allowed to use.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003996unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
Matt Arsenaultee522bf2014-09-26 17:55:06 +00003997 int OpIndices[3]) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003998 const MCInstrDesc &Desc = MI.getDesc();
Matt Arsenaultee522bf2014-09-26 17:55:06 +00003999
4000 // Find the one SGPR operand we are allowed to use.
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00004001 //
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004002 // First we need to consider the instruction's operand requirements before
4003 // legalizing. Some operands are required to be SGPRs, such as implicit uses
4004 // of VCC, but we are still bound by the constant bus requirement to only use
4005 // one.
4006 //
4007 // If the operand's class is an SGPR, we can never move it.
4008
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004009 unsigned SGPRReg = findImplicitSGPRRead(MI);
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00004010 if (SGPRReg != AMDGPU::NoRegister)
4011 return SGPRReg;
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004012
4013 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004014 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004015
4016 for (unsigned i = 0; i < 3; ++i) {
4017 int Idx = OpIndices[i];
4018 if (Idx == -1)
4019 break;
4020
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004021 const MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault6c067412015-11-03 22:30:15 +00004022 if (!MO.isReg())
4023 continue;
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004024
Matt Arsenault6c067412015-11-03 22:30:15 +00004025 // Is this operand statically required to be an SGPR based on the operand
4026 // constraints?
4027 const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
4028 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
4029 if (IsRequiredSGPR)
4030 return MO.getReg();
4031
4032 // If this could be a VGPR or an SGPR, Check the dynamic register class.
4033 unsigned Reg = MO.getReg();
4034 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
4035 if (RI.isSGPRClass(RegRC))
4036 UsedSGPRs[i] = Reg;
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004037 }
4038
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004039 // We don't have a required SGPR operand, so we have a bit more freedom in
4040 // selecting operands to move.
4041
4042 // Try to select the most used SGPR. If an SGPR is equal to one of the
4043 // others, we choose that.
4044 //
4045 // e.g.
4046 // V_FMA_F32 v0, s0, s0, s0 -> No moves
4047 // V_FMA_F32 v0, s0, s1, s0 -> Move s1
4048
Matt Arsenault6c067412015-11-03 22:30:15 +00004049 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
4050 // prefer those.
4051
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004052 if (UsedSGPRs[0] != AMDGPU::NoRegister) {
4053 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
4054 SGPRReg = UsedSGPRs[0];
4055 }
4056
4057 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
4058 if (UsedSGPRs[1] == UsedSGPRs[2])
4059 SGPRReg = UsedSGPRs[1];
4060 }
4061
4062 return SGPRReg;
4063}
4064
Tom Stellard6407e1e2014-08-01 00:32:33 +00004065MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenaultace5b762014-10-17 18:00:43 +00004066 unsigned OperandName) const {
Tom Stellard1aaad692014-07-21 16:55:33 +00004067 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
4068 if (Idx == -1)
4069 return nullptr;
4070
4071 return &MI.getOperand(Idx);
4072}
Tom Stellard794c8c02014-12-02 17:05:41 +00004073
4074uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
4075 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard4694ed02015-06-26 21:58:42 +00004076 if (ST.isAmdHsaOS()) {
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004077 // Set ATC = 1. GFX9 doesn't have this bit.
4078 if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
4079 RsrcDataFormat |= (1ULL << 56);
Tom Stellard794c8c02014-12-02 17:05:41 +00004080
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004081 // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
4082 // BTW, it disables TC L2 and therefore decreases performance.
4083 if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
Michel Danzerbeb79ce2016-03-16 09:10:35 +00004084 RsrcDataFormat |= (2ULL << 59);
Tom Stellard4694ed02015-06-26 21:58:42 +00004085 }
4086
Tom Stellard794c8c02014-12-02 17:05:41 +00004087 return RsrcDataFormat;
4088}
Marek Olsakd1a69a22015-09-29 23:37:32 +00004089
4090uint64_t SIInstrInfo::getScratchRsrcWords23() const {
4091 uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
4092 AMDGPU::RSRC_TID_ENABLE |
4093 0xffffffff; // Size;
4094
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004095 // GFX9 doesn't have ELEMENT_SIZE.
4096 if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
4097 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
4098 Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
4099 }
Matt Arsenault24ee0782016-02-12 02:40:47 +00004100
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004101 // IndexStride = 64.
4102 Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
Matt Arsenault24ee0782016-02-12 02:40:47 +00004103
Marek Olsakd1a69a22015-09-29 23:37:32 +00004104 // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
4105 // Clear them unless we want a huge stride.
Matt Arsenault43e92fe2016-06-24 06:30:11 +00004106 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
Marek Olsakd1a69a22015-09-29 23:37:32 +00004107 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
4108
4109 return Rsrc23;
4110}
Nicolai Haehnle02c32912016-01-13 16:10:10 +00004111
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004112bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
4113 unsigned Opc = MI.getOpcode();
Nicolai Haehnle02c32912016-01-13 16:10:10 +00004114
4115 return isSMRD(Opc);
4116}
4117
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004118bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
4119 unsigned Opc = MI.getOpcode();
Nicolai Haehnle02c32912016-01-13 16:10:10 +00004120
4121 return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
4122}
Tom Stellard2ff72622016-01-28 16:04:37 +00004123
Matt Arsenault3354f422016-09-10 01:20:33 +00004124unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
4125 int &FrameIndex) const {
4126 const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
4127 if (!Addr || !Addr->isFI())
4128 return AMDGPU::NoRegister;
4129
4130 assert(!MI.memoperands_empty() &&
Yaxun Liu1a14bfa2017-03-27 14:04:01 +00004131 (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
Matt Arsenault3354f422016-09-10 01:20:33 +00004132
4133 FrameIndex = Addr->getIndex();
4134 return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
4135}
4136
4137unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
4138 int &FrameIndex) const {
4139 const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
4140 assert(Addr && Addr->isFI());
4141 FrameIndex = Addr->getIndex();
4142 return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
4143}
4144
4145unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
4146 int &FrameIndex) const {
4147
4148 if (!MI.mayLoad())
4149 return AMDGPU::NoRegister;
4150
4151 if (isMUBUF(MI) || isVGPRSpill(MI))
4152 return isStackAccess(MI, FrameIndex);
4153
4154 if (isSGPRSpill(MI))
4155 return isSGPRStackAccess(MI, FrameIndex);
4156
4157 return AMDGPU::NoRegister;
4158}
4159
4160unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
4161 int &FrameIndex) const {
4162 if (!MI.mayStore())
4163 return AMDGPU::NoRegister;
4164
4165 if (isMUBUF(MI) || isVGPRSpill(MI))
4166 return isStackAccess(MI, FrameIndex);
4167
4168 if (isSGPRSpill(MI))
4169 return isSGPRStackAccess(MI, FrameIndex);
4170
4171 return AMDGPU::NoRegister;
4172}
4173
Matt Arsenault02458c22016-06-06 20:10:33 +00004174unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
4175 unsigned Opc = MI.getOpcode();
4176 const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
4177 unsigned DescSize = Desc.getSize();
4178
4179 // If we have a definitive size, we can use it. Otherwise we need to inspect
4180 // the operands to know the size.
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004181 //
4182 // FIXME: Instructions that have a base 32-bit encoding report their size as
4183 // 4, even though they are really 8 bytes if they have a literal operand.
4184 if (DescSize != 0 && DescSize != 4)
Matt Arsenault02458c22016-06-06 20:10:33 +00004185 return DescSize;
4186
Matt Arsenault02458c22016-06-06 20:10:33 +00004187 // 4-byte instructions may have a 32-bit literal encoded after them. Check
4188 // operands that coud ever be literals.
4189 if (isVALU(MI) || isSALU(MI)) {
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +00004190 if (isFixedSize(MI))
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004191 return DescSize;
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004192
Matt Arsenault02458c22016-06-06 20:10:33 +00004193 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4194 if (Src0Idx == -1)
4195 return 4; // No operands.
4196
Matt Arsenault4bd72362016-12-10 00:39:12 +00004197 if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
Matt Arsenault02458c22016-06-06 20:10:33 +00004198 return 8;
4199
4200 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4201 if (Src1Idx == -1)
4202 return 4;
4203
Matt Arsenault4bd72362016-12-10 00:39:12 +00004204 if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
Matt Arsenault02458c22016-06-06 20:10:33 +00004205 return 8;
4206
4207 return 4;
4208 }
4209
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004210 if (DescSize == 4)
4211 return 4;
4212
Matt Arsenault02458c22016-06-06 20:10:33 +00004213 switch (Opc) {
4214 case TargetOpcode::IMPLICIT_DEF:
4215 case TargetOpcode::KILL:
4216 case TargetOpcode::DBG_VALUE:
4217 case TargetOpcode::BUNDLE:
4218 case TargetOpcode::EH_LABEL:
4219 return 0;
4220 case TargetOpcode::INLINEASM: {
4221 const MachineFunction *MF = MI.getParent()->getParent();
4222 const char *AsmStr = MI.getOperand(0).getSymbolName();
4223 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
4224 }
4225 default:
4226 llvm_unreachable("unable to find instruction size");
4227 }
4228}
4229
Tom Stellard6695ba02016-10-28 23:53:48 +00004230bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
4231 if (!isFLAT(MI))
4232 return false;
4233
4234 if (MI.memoperands_empty())
4235 return true;
4236
4237 for (const MachineMemOperand *MMO : MI.memoperands()) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +00004238 if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
Tom Stellard6695ba02016-10-28 23:53:48 +00004239 return true;
4240 }
4241 return false;
4242}
4243
Jan Sjodina06bfe02017-05-15 20:18:37 +00004244bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
4245 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
4246}
4247
4248void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
4249 MachineBasicBlock *IfEnd) const {
4250 MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
4251 assert(TI != IfEntry->end());
4252
4253 MachineInstr *Branch = &(*TI);
4254 MachineFunction *MF = IfEntry->getParent();
4255 MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
4256
4257 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4258 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4259 MachineInstr *SIIF =
4260 BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
4261 .add(Branch->getOperand(0))
4262 .add(Branch->getOperand(1));
4263 MachineInstr *SIEND =
4264 BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
4265 .addReg(DstReg);
4266
4267 IfEntry->erase(TI);
4268 IfEntry->insert(IfEntry->end(), SIIF);
4269 IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
4270 }
4271}
4272
4273void SIInstrInfo::convertNonUniformLoopRegion(
4274 MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
4275 MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
4276 // We expect 2 terminators, one conditional and one unconditional.
4277 assert(TI != LoopEnd->end());
4278
4279 MachineInstr *Branch = &(*TI);
4280 MachineFunction *MF = LoopEnd->getParent();
4281 MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
4282
4283 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4284
4285 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4286 unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4287 MachineInstrBuilder HeaderPHIBuilder =
4288 BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
4289 for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
4290 E = LoopEntry->pred_end();
4291 PI != E; ++PI) {
4292 if (*PI == LoopEnd) {
4293 HeaderPHIBuilder.addReg(BackEdgeReg);
4294 } else {
4295 MachineBasicBlock *PMBB = *PI;
4296 unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4297 materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
4298 ZeroReg, 0);
4299 HeaderPHIBuilder.addReg(ZeroReg);
4300 }
4301 HeaderPHIBuilder.addMBB(*PI);
4302 }
4303 MachineInstr *HeaderPhi = HeaderPHIBuilder;
4304 MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
4305 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
4306 .addReg(DstReg)
4307 .add(Branch->getOperand(0));
4308 MachineInstr *SILOOP =
4309 BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
4310 .addReg(BackEdgeReg)
4311 .addMBB(LoopEntry);
4312
4313 LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
4314 LoopEnd->erase(TI);
4315 LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
4316 LoopEnd->insert(LoopEnd->end(), SILOOP);
4317 }
4318}
4319
Tom Stellard2ff72622016-01-28 16:04:37 +00004320ArrayRef<std::pair<int, const char *>>
4321SIInstrInfo::getSerializableTargetIndices() const {
4322 static const std::pair<int, const char *> TargetIndices[] = {
4323 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
4324 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
4325 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
4326 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
4327 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
4328 return makeArrayRef(TargetIndices);
4329}
Tom Stellardcb6ba622016-04-30 00:23:06 +00004330
4331/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
4332/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
4333ScheduleHazardRecognizer *
4334SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
4335 const ScheduleDAG *DAG) const {
4336 return new GCNHazardRecognizer(DAG->MF);
4337}
4338
4339/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
4340/// pass.
4341ScheduleHazardRecognizer *
4342SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
4343 return new GCNHazardRecognizer(MF);
4344}
Stanislav Mekhanoshin6ec3e3a2017-01-20 00:44:31 +00004345
Matt Arsenault3f031e72017-07-02 23:21:48 +00004346std::pair<unsigned, unsigned>
4347SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4348 return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
4349}
4350
4351ArrayRef<std::pair<unsigned, const char *>>
4352SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4353 static const std::pair<unsigned, const char *> TargetFlags[] = {
4354 { MO_GOTPCREL, "amdgpu-gotprel" },
4355 { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
4356 { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
4357 { MO_REL32_LO, "amdgpu-rel32-lo" },
4358 { MO_REL32_HI, "amdgpu-rel32-hi" }
4359 };
4360
4361 return makeArrayRef(TargetFlags);
4362}
4363
Stanislav Mekhanoshin6ec3e3a2017-01-20 00:44:31 +00004364bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
4365 return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
4366 MI.modifiesRegister(AMDGPU::EXEC, &RI);
4367}
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +00004368
4369MachineInstrBuilder
4370SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
4371 MachineBasicBlock::iterator I,
4372 const DebugLoc &DL,
4373 unsigned DestReg) const {
4374 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4375
4376 unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4377
4378 return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
4379 .addReg(UnusedCarry, RegState::Define | RegState::Dead);
4380}