blob: f7f6d52e751fb52ff5630b119f86febbbca5cc5a [file] [log] [blame]
Eugene Zelenko59e12822017-08-08 00:47:13 +00001//===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
Tom Stellard75aadc22012-12-11 21:25:42 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
Tom Stellard75aadc22012-12-11 21:25:42 +000015#include "SIInstrInfo.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000016#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000018#include "GCNHazardRecognizer.h"
Tom Stellard16a9a202013-08-14 23:24:17 +000019#include "SIDefines.h"
Tom Stellardc149dc02013-11-27 21:23:35 +000020#include "SIMachineFunctionInfo.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000021#include "SIRegisterInfo.h"
22#include "Utils/AMDGPUBaseInfo.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/ADT/iterator_range.h"
28#include "llvm/Analysis/AliasAnalysis.h"
29#include "llvm/Analysis/MemoryLocation.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
Tom Stellardc5cf2f02014-08-21 20:40:54 +000031#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000032#include "llvm/CodeGen/MachineFunction.h"
33#include "llvm/CodeGen/MachineInstr.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000034#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000035#include "llvm/CodeGen/MachineInstrBundle.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineOperand.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000038#include "llvm/CodeGen/MachineRegisterInfo.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000039#include "llvm/CodeGen/MachineValueType.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000040#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellardcb6ba622016-04-30 00:23:06 +000041#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000042#include "llvm/CodeGen/SelectionDAGNodes.h"
43#include "llvm/IR/DebugLoc.h"
Matt Arsenault21a43822017-04-06 21:09:53 +000044#include "llvm/IR/DiagnosticInfo.h"
Tom Stellard4e07b1d2014-06-10 21:20:41 +000045#include "llvm/IR/Function.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000046#include "llvm/IR/InlineAsm.h"
47#include "llvm/IR/LLVMContext.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000048#include "llvm/MC/MCInstrDesc.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000049#include "llvm/Support/Casting.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/Compiler.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/MathExtras.h"
54#include "llvm/Target/TargetMachine.h"
55#include "llvm/Target/TargetOpcodes.h"
56#include "llvm/Target/TargetRegisterInfo.h"
57#include <cassert>
58#include <cstdint>
59#include <iterator>
60#include <utility>
Tom Stellard75aadc22012-12-11 21:25:42 +000061
62using namespace llvm;
63
Matt Arsenault6bc43d82016-10-06 16:20:41 +000064// Must be at least 4 to be able to branch over minimum unconditional branch
65// code. This is only for making it possible to write reasonably small tests for
66// long branches.
67static cl::opt<unsigned>
68BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
69 cl::desc("Restrict range of branch instructions (DEBUG)"));
70
Matt Arsenault43e92fe2016-06-24 06:30:11 +000071SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
Matt Arsenaulte0bf7d02017-02-21 19:12:08 +000072 : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000073
Tom Stellard82166022013-11-13 23:36:37 +000074//===----------------------------------------------------------------------===//
75// TargetInstrInfo callbacks
76//===----------------------------------------------------------------------===//
77
Matt Arsenaultc10853f2014-08-06 00:29:43 +000078static unsigned getNumOperandsNoGlue(SDNode *Node) {
79 unsigned N = Node->getNumOperands();
80 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
81 --N;
82 return N;
83}
84
85static SDValue findChainOperand(SDNode *Load) {
86 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
87 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
88 return LastOp;
89}
90
Tom Stellard155bbb72014-08-11 22:18:17 +000091/// \brief Returns true if both nodes have the same value for the given
92/// operand \p Op, or if both nodes do not have this operand.
93static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
94 unsigned Opc0 = N0->getMachineOpcode();
95 unsigned Opc1 = N1->getMachineOpcode();
96
97 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
98 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
99
100 if (Op0Idx == -1 && Op1Idx == -1)
101 return true;
102
103
104 if ((Op0Idx == -1 && Op1Idx != -1) ||
105 (Op1Idx == -1 && Op0Idx != -1))
106 return false;
107
108 // getNamedOperandIdx returns the index for the MachineInstr's operands,
109 // which includes the result as the first operand. We are indexing into the
110 // MachineSDNode's operands, so we need to skip the result operand to get
111 // the real index.
112 --Op0Idx;
113 --Op1Idx;
114
Tom Stellardb8b84132014-09-03 15:22:39 +0000115 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard155bbb72014-08-11 22:18:17 +0000116}
117
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000118bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
Matt Arsenaulta48b8662015-04-23 23:34:48 +0000119 AliasAnalysis *AA) const {
120 // TODO: The generic check fails for VALU instructions that should be
121 // rematerializable due to implicit reads of exec. We really want all of the
122 // generic logic for this except for this.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000123 switch (MI.getOpcode()) {
Matt Arsenaulta48b8662015-04-23 23:34:48 +0000124 case AMDGPU::V_MOV_B32_e32:
125 case AMDGPU::V_MOV_B32_e64:
Matt Arsenault80f766a2015-09-10 01:23:28 +0000126 case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenaulta48b8662015-04-23 23:34:48 +0000127 return true;
128 default:
129 return false;
130 }
131}
132
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000133bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
134 int64_t &Offset0,
135 int64_t &Offset1) const {
136 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
137 return false;
138
139 unsigned Opc0 = Load0->getMachineOpcode();
140 unsigned Opc1 = Load1->getMachineOpcode();
141
142 // Make sure both are actually loads.
143 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
144 return false;
145
146 if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard20fa0be2014-10-07 21:09:20 +0000147
148 // FIXME: Handle this case:
149 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
150 return false;
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000151
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000152 // Check base reg.
153 if (Load0->getOperand(1) != Load1->getOperand(1))
154 return false;
155
156 // Check chain.
157 if (findChainOperand(Load0) != findChainOperand(Load1))
158 return false;
159
Matt Arsenault972c12a2014-09-17 17:48:32 +0000160 // Skip read2 / write2 variants for simplicity.
161 // TODO: We should report true if the used offsets are adjacent (excluded
162 // st64 versions).
163 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
164 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
165 return false;
166
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000167 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
168 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
169 return true;
170 }
171
172 if (isSMRD(Opc0) && isSMRD(Opc1)) {
Nicolai Haehnleef449782017-04-24 16:53:52 +0000173 // Skip time and cache invalidation instructions.
174 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
175 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
176 return false;
177
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000178 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
179
180 // Check base reg.
181 if (Load0->getOperand(0) != Load1->getOperand(0))
182 return false;
183
Tom Stellardf0a575f2015-03-23 16:06:01 +0000184 const ConstantSDNode *Load0Offset =
185 dyn_cast<ConstantSDNode>(Load0->getOperand(1));
186 const ConstantSDNode *Load1Offset =
187 dyn_cast<ConstantSDNode>(Load1->getOperand(1));
188
189 if (!Load0Offset || !Load1Offset)
190 return false;
191
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000192 // Check chain.
193 if (findChainOperand(Load0) != findChainOperand(Load1))
194 return false;
195
Tom Stellardf0a575f2015-03-23 16:06:01 +0000196 Offset0 = Load0Offset->getZExtValue();
197 Offset1 = Load1Offset->getZExtValue();
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000198 return true;
199 }
200
201 // MUBUF and MTBUF can access the same addresses.
202 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000203
204 // MUBUF and MTBUF have vaddr at different indices.
Tom Stellard155bbb72014-08-11 22:18:17 +0000205 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
206 findChainOperand(Load0) != findChainOperand(Load1) ||
207 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
Tom Stellardb8b84132014-09-03 15:22:39 +0000208 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000209 return false;
210
Tom Stellard155bbb72014-08-11 22:18:17 +0000211 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
212 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
213
214 if (OffIdx0 == -1 || OffIdx1 == -1)
215 return false;
216
217 // getNamedOperandIdx returns the index for MachineInstrs. Since they
218 // inlcude the output in the operand list, but SDNodes don't, we need to
219 // subtract the index by one.
220 --OffIdx0;
221 --OffIdx1;
222
223 SDValue Off0 = Load0->getOperand(OffIdx0);
224 SDValue Off1 = Load1->getOperand(OffIdx1);
225
226 // The offset might be a FrameIndexSDNode.
227 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
228 return false;
229
230 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
231 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenaultc10853f2014-08-06 00:29:43 +0000232 return true;
233 }
234
235 return false;
236}
237
Matt Arsenault2e991122014-09-10 23:26:16 +0000238static bool isStride64(unsigned Opc) {
239 switch (Opc) {
240 case AMDGPU::DS_READ2ST64_B32:
241 case AMDGPU::DS_READ2ST64_B64:
242 case AMDGPU::DS_WRITE2ST64_B32:
243 case AMDGPU::DS_WRITE2ST64_B64:
244 return true;
245 default:
246 return false;
247 }
248}
249
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000250bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
Chad Rosierc27a18f2016-03-09 16:00:35 +0000251 int64_t &Offset,
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000252 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000253 unsigned Opc = LdSt.getOpcode();
Matt Arsenault3add6432015-10-20 04:35:43 +0000254
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000255 if (isDS(LdSt)) {
256 const MachineOperand *OffsetImm =
257 getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000258 if (OffsetImm) {
259 // Normal, single offset LDS instruction.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000260 const MachineOperand *AddrReg =
261 getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000262
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000263 BaseReg = AddrReg->getReg();
264 Offset = OffsetImm->getImm();
265 return true;
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000266 }
267
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000268 // The 2 offset instructions use offset0 and offset1 instead. We can treat
269 // these as a load with a single offset if the 2 offsets are consecutive. We
270 // will use this for some partially aligned loads.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000271 const MachineOperand *Offset0Imm =
272 getNamedOperand(LdSt, AMDGPU::OpName::offset0);
273 const MachineOperand *Offset1Imm =
274 getNamedOperand(LdSt, AMDGPU::OpName::offset1);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000275
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000276 uint8_t Offset0 = Offset0Imm->getImm();
277 uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000278
Matt Arsenault84db5d92015-07-14 17:57:36 +0000279 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000280 // Each of these offsets is in element sized units, so we need to convert
281 // to bytes of the individual reads.
282
283 unsigned EltSize;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000284 if (LdSt.mayLoad())
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000285 EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000286 else {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000287 assert(LdSt.mayStore());
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000288 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000289 EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000290 }
291
Matt Arsenault2e991122014-09-10 23:26:16 +0000292 if (isStride64(Opc))
293 EltSize *= 64;
294
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000295 const MachineOperand *AddrReg =
296 getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault7eb0a102014-07-30 01:01:10 +0000297 BaseReg = AddrReg->getReg();
298 Offset = EltSize * Offset0;
299 return true;
300 }
301
302 return false;
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000303 }
304
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000305 if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
Matt Arsenault36666292016-11-15 20:14:27 +0000306 const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
307 if (SOffset && SOffset->isReg())
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000308 return false;
309
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000310 const MachineOperand *AddrReg =
311 getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000312 if (!AddrReg)
313 return false;
314
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000315 const MachineOperand *OffsetImm =
316 getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000317 BaseReg = AddrReg->getReg();
318 Offset = OffsetImm->getImm();
Matt Arsenault36666292016-11-15 20:14:27 +0000319
320 if (SOffset) // soffset can be an inline immediate.
321 Offset += SOffset->getImm();
322
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000323 return true;
324 }
325
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000326 if (isSMRD(LdSt)) {
327 const MachineOperand *OffsetImm =
328 getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000329 if (!OffsetImm)
330 return false;
331
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000332 const MachineOperand *SBaseReg =
333 getNamedOperand(LdSt, AMDGPU::OpName::sbase);
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000334 BaseReg = SBaseReg->getReg();
335 Offset = OffsetImm->getImm();
336 return true;
337 }
338
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000339 if (isFLAT(LdSt)) {
Matt Arsenault37a58e02017-07-21 18:06:36 +0000340 const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
341 if (VAddr) {
342 // Can't analyze 2 offsets.
343 if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
344 return false;
345
346 BaseReg = VAddr->getReg();
347 } else {
348 // scratch instructions have either vaddr or saddr.
349 BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
350 }
351
352 Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
Matt Arsenault43578ec2016-06-02 20:05:20 +0000353 return true;
354 }
355
Matt Arsenault1acc72f2014-07-29 21:34:55 +0000356 return false;
357}
358
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000359bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
360 MachineInstr &SecondLdSt,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +0000361 unsigned NumLoads) const {
NAKAMURA Takumife1202c2016-06-20 00:37:41 +0000362 const MachineOperand *FirstDst = nullptr;
363 const MachineOperand *SecondDst = nullptr;
Tom Stellarda76bcc22016-03-28 16:10:13 +0000364
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000365 if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
Matt Arsenault74f64832017-02-01 20:22:51 +0000366 (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
367 (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000368 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin949fac92017-09-06 15:31:30 +0000369 if (!FirstDst)
370 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000371 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin949fac92017-09-06 15:31:30 +0000372 if (!SecondDst)
373 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Matt Arsenault437fd712016-11-29 19:30:41 +0000374 } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
375 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
376 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
377 } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
378 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
379 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Tom Stellarda76bcc22016-03-28 16:10:13 +0000380 }
381
382 if (!FirstDst || !SecondDst)
Matt Arsenault0e75a062014-09-17 17:48:30 +0000383 return false;
384
Tom Stellarda76bcc22016-03-28 16:10:13 +0000385 // Try to limit clustering based on the total number of bytes loaded
386 // rather than the number of instructions. This is done to help reduce
387 // register pressure. The method used is somewhat inexact, though,
388 // because it assumes that all loads in the cluster will load the
389 // same number of bytes as FirstLdSt.
Matt Arsenault0e75a062014-09-17 17:48:30 +0000390
Tom Stellarda76bcc22016-03-28 16:10:13 +0000391 // The unit of this value is bytes.
392 // FIXME: This needs finer tuning.
393 unsigned LoadClusterThreshold = 16;
Matt Arsenault0e75a062014-09-17 17:48:30 +0000394
Tom Stellarda76bcc22016-03-28 16:10:13 +0000395 const MachineRegisterInfo &MRI =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000396 FirstLdSt.getParent()->getParent()->getRegInfo();
Tom Stellarda76bcc22016-03-28 16:10:13 +0000397 const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
398
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000399 return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
Matt Arsenault0e75a062014-09-17 17:48:30 +0000400}
401
Matt Arsenault21a43822017-04-06 21:09:53 +0000402static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
403 MachineBasicBlock::iterator MI,
404 const DebugLoc &DL, unsigned DestReg,
405 unsigned SrcReg, bool KillSrc) {
406 MachineFunction *MF = MBB.getParent();
407 DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(),
408 "illegal SGPR to VGPR copy",
409 DL, DS_Error);
410 LLVMContext &C = MF->getFunction()->getContext();
411 C.diagnose(IllegalCopy);
412
413 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
414 .addReg(SrcReg, getKillRegState(KillSrc));
415}
416
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000417void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
418 MachineBasicBlock::iterator MI,
419 const DebugLoc &DL, unsigned DestReg,
420 unsigned SrcReg, bool KillSrc) const {
Matt Arsenault314cbf72016-11-07 16:39:22 +0000421 const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
Christian Konigd0e3da12013-03-01 09:46:27 +0000422
Matt Arsenault314cbf72016-11-07 16:39:22 +0000423 if (RC == &AMDGPU::VGPR_32RegClass) {
424 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
425 AMDGPU::SReg_32RegClass.contains(SrcReg));
426 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
427 .addReg(SrcReg, getKillRegState(KillSrc));
428 return;
429 }
Christian Konigd0e3da12013-03-01 09:46:27 +0000430
Marek Olsak79c05872016-11-25 17:37:09 +0000431 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
432 RC == &AMDGPU::SReg_32RegClass) {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000433 if (SrcReg == AMDGPU::SCC) {
434 BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
435 .addImm(-1)
436 .addImm(0);
437 return;
438 }
439
Matt Arsenault21a43822017-04-06 21:09:53 +0000440 if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
441 reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
442 return;
443 }
444
Christian Konigd0e3da12013-03-01 09:46:27 +0000445 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
446 .addReg(SrcReg, getKillRegState(KillSrc));
447 return;
Matt Arsenault314cbf72016-11-07 16:39:22 +0000448 }
Christian Konigd0e3da12013-03-01 09:46:27 +0000449
Matt Arsenault314cbf72016-11-07 16:39:22 +0000450 if (RC == &AMDGPU::SReg_64RegClass) {
Matt Arsenault834b1aa2015-02-14 02:55:54 +0000451 if (DestReg == AMDGPU::VCC) {
Matt Arsenault99981682015-02-14 02:55:56 +0000452 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
453 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
454 .addReg(SrcReg, getKillRegState(KillSrc));
455 } else {
456 // FIXME: Hack until VReg_1 removed.
457 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000458 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
Matt Arsenault99981682015-02-14 02:55:56 +0000459 .addImm(0)
460 .addReg(SrcReg, getKillRegState(KillSrc));
461 }
Matt Arsenault834b1aa2015-02-14 02:55:54 +0000462
Matt Arsenault834b1aa2015-02-14 02:55:54 +0000463 return;
464 }
465
Matt Arsenault21a43822017-04-06 21:09:53 +0000466 if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
467 reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
468 return;
469 }
470
Tom Stellard75aadc22012-12-11 21:25:42 +0000471 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
472 .addReg(SrcReg, getKillRegState(KillSrc));
Christian Konigd0e3da12013-03-01 09:46:27 +0000473 return;
Christian Konigd0e3da12013-03-01 09:46:27 +0000474 }
475
Matt Arsenault314cbf72016-11-07 16:39:22 +0000476 if (DestReg == AMDGPU::SCC) {
477 assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
478 BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
479 .addReg(SrcReg, getKillRegState(KillSrc))
480 .addImm(0);
481 return;
482 }
483
484 unsigned EltSize = 4;
485 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
486 if (RI.isSGPRClass(RC)) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000487 if (RI.getRegSizeInBits(*RC) > 32) {
Matt Arsenault314cbf72016-11-07 16:39:22 +0000488 Opcode = AMDGPU::S_MOV_B64;
489 EltSize = 8;
490 } else {
491 Opcode = AMDGPU::S_MOV_B32;
492 EltSize = 4;
493 }
Matt Arsenault21a43822017-04-06 21:09:53 +0000494
495 if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
496 reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
497 return;
498 }
Matt Arsenault314cbf72016-11-07 16:39:22 +0000499 }
500
501 ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
Matt Arsenault73d2f892016-07-15 22:32:02 +0000502 bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
Nicolai Haehnledd587052015-12-19 01:16:06 +0000503
504 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
505 unsigned SubIdx;
506 if (Forward)
507 SubIdx = SubIndices[Idx];
508 else
509 SubIdx = SubIndices[SubIndices.size() - Idx - 1];
510
Christian Konigd0e3da12013-03-01 09:46:27 +0000511 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
512 get(Opcode), RI.getSubReg(DestReg, SubIdx));
513
Nicolai Haehnledd587052015-12-19 01:16:06 +0000514 Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konigd0e3da12013-03-01 09:46:27 +0000515
Nicolai Haehnledd587052015-12-19 01:16:06 +0000516 if (Idx == 0)
Christian Konigd0e3da12013-03-01 09:46:27 +0000517 Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
Matt Arsenault73d2f892016-07-15 22:32:02 +0000518
Matt Arsenault05c26472017-06-12 17:19:20 +0000519 bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
520 Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000521 }
522}
523
Matt Arsenaultbbb47da2016-09-08 17:19:29 +0000524int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
Christian Konig3c145802013-03-27 09:12:59 +0000525 int NewOpc;
526
527 // Try to map original to commuted opcode
Marek Olsak191507e2015-02-03 17:38:12 +0000528 NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsakcfbdba22015-06-26 20:29:10 +0000529 if (NewOpc != -1)
530 // Check if the commuted (REV) opcode exists on the target.
531 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig3c145802013-03-27 09:12:59 +0000532
533 // Try to map commuted to original opcode
Marek Olsak191507e2015-02-03 17:38:12 +0000534 NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsakcfbdba22015-06-26 20:29:10 +0000535 if (NewOpc != -1)
536 // Check if the original (non-REV) opcode exists on the target.
537 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig3c145802013-03-27 09:12:59 +0000538
539 return Opcode;
540}
541
Jan Sjodina06bfe02017-05-15 20:18:37 +0000542void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
543 MachineBasicBlock::iterator MI,
544 const DebugLoc &DL, unsigned DestReg,
545 int64_t Value) const {
546 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
547 const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
548 if (RegClass == &AMDGPU::SReg_32RegClass ||
549 RegClass == &AMDGPU::SGPR_32RegClass ||
550 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
551 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
552 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
553 .addImm(Value);
554 return;
555 }
556
557 if (RegClass == &AMDGPU::SReg_64RegClass ||
558 RegClass == &AMDGPU::SGPR_64RegClass ||
559 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
560 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
561 .addImm(Value);
562 return;
563 }
564
565 if (RegClass == &AMDGPU::VGPR_32RegClass) {
566 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
567 .addImm(Value);
568 return;
569 }
570 if (RegClass == &AMDGPU::VReg_64RegClass) {
571 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
572 .addImm(Value);
573 return;
574 }
575
576 unsigned EltSize = 4;
577 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
578 if (RI.isSGPRClass(RegClass)) {
579 if (RI.getRegSizeInBits(*RegClass) > 32) {
580 Opcode = AMDGPU::S_MOV_B64;
581 EltSize = 8;
582 } else {
583 Opcode = AMDGPU::S_MOV_B32;
584 EltSize = 4;
585 }
586 }
587
588 ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
589 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
590 int64_t IdxValue = Idx == 0 ? Value : 0;
591
592 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
593 get(Opcode), RI.getSubReg(DestReg, Idx));
594 Builder.addImm(IdxValue);
595 }
596}
597
598const TargetRegisterClass *
599SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
600 return &AMDGPU::VGPR_32RegClass;
601}
602
603void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
604 MachineBasicBlock::iterator I,
605 const DebugLoc &DL, unsigned DstReg,
606 ArrayRef<MachineOperand> Cond,
607 unsigned TrueReg,
608 unsigned FalseReg) const {
609 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
NAKAMURA Takumi994a43d2017-05-16 04:01:23 +0000610 assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
611 "Not a VGPR32 reg");
Jan Sjodina06bfe02017-05-15 20:18:37 +0000612
613 if (Cond.size() == 1) {
614 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
615 .addReg(FalseReg)
616 .addReg(TrueReg)
617 .add(Cond[0]);
618 } else if (Cond.size() == 2) {
619 assert(Cond[0].isImm() && "Cond[0] is not an immediate");
620 switch (Cond[0].getImm()) {
621 case SIInstrInfo::SCC_TRUE: {
622 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
623 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
624 .addImm(-1)
625 .addImm(0);
626 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
627 .addReg(FalseReg)
628 .addReg(TrueReg)
629 .addReg(SReg);
630 break;
631 }
632 case SIInstrInfo::SCC_FALSE: {
633 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
634 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
635 .addImm(0)
636 .addImm(-1);
637 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
638 .addReg(FalseReg)
639 .addReg(TrueReg)
640 .addReg(SReg);
641 break;
642 }
643 case SIInstrInfo::VCCNZ: {
644 MachineOperand RegOp = Cond[1];
645 RegOp.setImplicit(false);
646 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
647 .addReg(FalseReg)
648 .addReg(TrueReg)
649 .add(RegOp);
650 break;
651 }
652 case SIInstrInfo::VCCZ: {
653 MachineOperand RegOp = Cond[1];
654 RegOp.setImplicit(false);
655 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
656 .addReg(TrueReg)
657 .addReg(FalseReg)
658 .add(RegOp);
659 break;
660 }
661 case SIInstrInfo::EXECNZ: {
662 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
663 unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
664 BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
665 .addImm(0);
666 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
667 .addImm(-1)
668 .addImm(0);
669 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
670 .addReg(FalseReg)
671 .addReg(TrueReg)
672 .addReg(SReg);
673 break;
674 }
675 case SIInstrInfo::EXECZ: {
676 unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
677 unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
678 BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
679 .addImm(0);
680 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
681 .addImm(0)
682 .addImm(-1);
683 BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
684 .addReg(FalseReg)
685 .addReg(TrueReg)
686 .addReg(SReg);
687 llvm_unreachable("Unhandled branch predicate EXECZ");
688 break;
689 }
690 default:
691 llvm_unreachable("invalid branch predicate");
692 }
693 } else {
694 llvm_unreachable("Can only handle Cond size 1 or 2");
695 }
696}
697
698unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
699 MachineBasicBlock::iterator I,
700 const DebugLoc &DL,
701 unsigned SrcReg, int Value) const {
702 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
703 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
704 BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
705 .addImm(Value)
706 .addReg(SrcReg);
707
708 return Reg;
709}
710
711unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
712 MachineBasicBlock::iterator I,
713 const DebugLoc &DL,
714 unsigned SrcReg, int Value) const {
715 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
716 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
717 BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
718 .addImm(Value)
719 .addReg(SrcReg);
720
721 return Reg;
722}
723
Tom Stellardef3b8642015-01-07 19:56:17 +0000724unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
725
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000726 if (RI.getRegSizeInBits(*DstRC) == 32) {
Tom Stellardef3b8642015-01-07 19:56:17 +0000727 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000728 } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
Tom Stellardef3b8642015-01-07 19:56:17 +0000729 return AMDGPU::S_MOV_B64;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000730 } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
Tom Stellard4842c052015-01-07 20:27:25 +0000731 return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellardef3b8642015-01-07 19:56:17 +0000732 }
733 return AMDGPU::COPY;
734}
735
Matt Arsenault08f14de2015-11-06 18:07:53 +0000736static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
737 switch (Size) {
738 case 4:
739 return AMDGPU::SI_SPILL_S32_SAVE;
740 case 8:
741 return AMDGPU::SI_SPILL_S64_SAVE;
742 case 16:
743 return AMDGPU::SI_SPILL_S128_SAVE;
744 case 32:
745 return AMDGPU::SI_SPILL_S256_SAVE;
746 case 64:
747 return AMDGPU::SI_SPILL_S512_SAVE;
748 default:
749 llvm_unreachable("unknown register size");
750 }
751}
752
753static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
754 switch (Size) {
755 case 4:
756 return AMDGPU::SI_SPILL_V32_SAVE;
757 case 8:
758 return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard703b2ec2016-04-12 23:57:30 +0000759 case 12:
760 return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault08f14de2015-11-06 18:07:53 +0000761 case 16:
762 return AMDGPU::SI_SPILL_V128_SAVE;
763 case 32:
764 return AMDGPU::SI_SPILL_V256_SAVE;
765 case 64:
766 return AMDGPU::SI_SPILL_V512_SAVE;
767 default:
768 llvm_unreachable("unknown register size");
769 }
770}
771
Tom Stellardc149dc02013-11-27 21:23:35 +0000772void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
773 MachineBasicBlock::iterator MI,
774 unsigned SrcReg, bool isKill,
775 int FrameIndex,
776 const TargetRegisterClass *RC,
777 const TargetRegisterInfo *TRI) const {
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000778 MachineFunction *MF = MBB.getParent();
Tom Stellard42fb60e2015-01-14 15:42:31 +0000779 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000780 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Tom Stellardc149dc02013-11-27 21:23:35 +0000781 DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000782
Matthias Braun941a7052016-07-28 18:40:00 +0000783 unsigned Size = FrameInfo.getObjectSize(FrameIndex);
784 unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000785 MachinePointerInfo PtrInfo
786 = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
787 MachineMemOperand *MMO
788 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
789 Size, Align);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000790 unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellardc149dc02013-11-27 21:23:35 +0000791
Tom Stellard96468902014-09-24 01:33:17 +0000792 if (RI.isSGPRClass(RC)) {
Matt Arsenault5b22dfa2015-11-05 05:27:10 +0000793 MFI->setHasSpilledSGPRs();
794
Matt Arsenault2510a312016-09-03 06:57:55 +0000795 // We are only allowed to create one new instruction when spilling
796 // registers, so we need to use pseudo instruction for spilling SGPRs.
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000797 const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
Matt Arsenault2510a312016-09-03 06:57:55 +0000798
799 // The SGPR spill/restore instructions only work on number sgprs, so we need
800 // to make sure we are using the correct register class.
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000801 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
Matt Arsenaultb6e1cc22016-05-21 00:53:42 +0000802 MachineRegisterInfo &MRI = MF->getRegInfo();
803 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
804 }
805
Marek Olsak79c05872016-11-25 17:37:09 +0000806 MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
Matt Arsenault3354f422016-09-10 01:20:33 +0000807 .addReg(SrcReg, getKillRegState(isKill)) // data
808 .addFrameIndex(FrameIndex) // addr
Matt Arsenault08906a32016-10-28 19:43:31 +0000809 .addMemOperand(MMO)
810 .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000811 .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault08906a32016-10-28 19:43:31 +0000812 // Add the scratch resource registers as implicit uses because we may end up
813 // needing them, and need to ensure that the reserved registers are
814 // correctly handled.
Tom Stellard42fb60e2015-01-14 15:42:31 +0000815
Matt Arsenaultdb782732017-07-20 21:03:45 +0000816 FrameInfo.setStackID(FrameIndex, 1);
Marek Olsak79c05872016-11-25 17:37:09 +0000817 if (ST.hasScalarStores()) {
818 // m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle43cc6c42017-06-27 08:04:13 +0000819 Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
Marek Olsak79c05872016-11-25 17:37:09 +0000820 }
821
Matt Arsenault08f14de2015-11-06 18:07:53 +0000822 return;
Tom Stellard96468902014-09-24 01:33:17 +0000823 }
Tom Stellardeba61072014-05-02 15:41:42 +0000824
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000825 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard96468902014-09-24 01:33:17 +0000826 LLVMContext &Ctx = MF->getFunction()->getContext();
827 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
828 " spill register");
Tom Stellard0febe682015-01-14 15:42:34 +0000829 BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
Matt Arsenault08f14de2015-11-06 18:07:53 +0000830 .addReg(SrcReg);
831
832 return;
833 }
834
835 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
836
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000837 unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000838 MFI->setHasSpilledVGPRs();
839 BuildMI(MBB, MI, DL, get(Opcode))
Matt Arsenault3354f422016-09-10 01:20:33 +0000840 .addReg(SrcReg, getKillRegState(isKill)) // data
841 .addFrameIndex(FrameIndex) // addr
Matt Arsenault2510a312016-09-03 06:57:55 +0000842 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000843 .addReg(MFI->getFrameOffsetReg()) // scratch_offset
Matt Arsenault2510a312016-09-03 06:57:55 +0000844 .addImm(0) // offset
Matt Arsenault08f14de2015-11-06 18:07:53 +0000845 .addMemOperand(MMO);
846}
847
848static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
849 switch (Size) {
850 case 4:
851 return AMDGPU::SI_SPILL_S32_RESTORE;
852 case 8:
853 return AMDGPU::SI_SPILL_S64_RESTORE;
854 case 16:
855 return AMDGPU::SI_SPILL_S128_RESTORE;
856 case 32:
857 return AMDGPU::SI_SPILL_S256_RESTORE;
858 case 64:
859 return AMDGPU::SI_SPILL_S512_RESTORE;
860 default:
861 llvm_unreachable("unknown register size");
862 }
863}
864
865static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
866 switch (Size) {
867 case 4:
868 return AMDGPU::SI_SPILL_V32_RESTORE;
869 case 8:
870 return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard703b2ec2016-04-12 23:57:30 +0000871 case 12:
872 return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault08f14de2015-11-06 18:07:53 +0000873 case 16:
874 return AMDGPU::SI_SPILL_V128_RESTORE;
875 case 32:
876 return AMDGPU::SI_SPILL_V256_RESTORE;
877 case 64:
878 return AMDGPU::SI_SPILL_V512_RESTORE;
879 default:
880 llvm_unreachable("unknown register size");
Tom Stellardc149dc02013-11-27 21:23:35 +0000881 }
882}
883
884void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
885 MachineBasicBlock::iterator MI,
886 unsigned DestReg, int FrameIndex,
887 const TargetRegisterClass *RC,
888 const TargetRegisterInfo *TRI) const {
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000889 MachineFunction *MF = MBB.getParent();
Tom Stellarde99fb652015-01-20 19:33:04 +0000890 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000891 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Tom Stellardc149dc02013-11-27 21:23:35 +0000892 DebugLoc DL = MBB.findDebugLoc(MI);
Matthias Braun941a7052016-07-28 18:40:00 +0000893 unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
894 unsigned Size = FrameInfo.getObjectSize(FrameIndex);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000895 unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000896
Matt Arsenault08f14de2015-11-06 18:07:53 +0000897 MachinePointerInfo PtrInfo
898 = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
899
900 MachineMemOperand *MMO = MF->getMachineMemOperand(
901 PtrInfo, MachineMemOperand::MOLoad, Size, Align);
902
903 if (RI.isSGPRClass(RC)) {
904 // FIXME: Maybe this should not include a memoperand because it will be
905 // lowered to non-memory instructions.
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000906 const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
907 if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
Matt Arsenaultb6e1cc22016-05-21 00:53:42 +0000908 MachineRegisterInfo &MRI = MF->getRegInfo();
909 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
910 }
911
Matt Arsenaultdb782732017-07-20 21:03:45 +0000912 FrameInfo.setStackID(FrameIndex, 1);
Marek Olsak79c05872016-11-25 17:37:09 +0000913 MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
Matt Arsenault3354f422016-09-10 01:20:33 +0000914 .addFrameIndex(FrameIndex) // addr
Matt Arsenault08906a32016-10-28 19:43:31 +0000915 .addMemOperand(MMO)
916 .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000917 .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000918
Marek Olsak79c05872016-11-25 17:37:09 +0000919 if (ST.hasScalarStores()) {
920 // m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle43cc6c42017-06-27 08:04:13 +0000921 Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
Marek Olsak79c05872016-11-25 17:37:09 +0000922 }
923
Matt Arsenault08f14de2015-11-06 18:07:53 +0000924 return;
Tom Stellard96468902014-09-24 01:33:17 +0000925 }
Tom Stellardeba61072014-05-02 15:41:42 +0000926
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000927 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard96468902014-09-24 01:33:17 +0000928 LLVMContext &Ctx = MF->getFunction()->getContext();
929 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
930 " restore register");
Tom Stellard0febe682015-01-14 15:42:34 +0000931 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000932
933 return;
Tom Stellardc149dc02013-11-27 21:23:35 +0000934 }
Matt Arsenault08f14de2015-11-06 18:07:53 +0000935
936 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
937
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +0000938 unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
Matt Arsenault08f14de2015-11-06 18:07:53 +0000939 BuildMI(MBB, MI, DL, get(Opcode), DestReg)
Matt Arsenaultea8a4ed2017-05-17 19:37:57 +0000940 .addFrameIndex(FrameIndex) // vaddr
941 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
942 .addReg(MFI->getFrameOffsetReg()) // scratch_offset
943 .addImm(0) // offset
Matt Arsenault08f14de2015-11-06 18:07:53 +0000944 .addMemOperand(MMO);
Tom Stellardc149dc02013-11-27 21:23:35 +0000945}
946
Tom Stellard96468902014-09-24 01:33:17 +0000947/// \param @Offset Offset in bytes of the FrameIndex being spilled
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000948unsigned SIInstrInfo::calculateLDSSpillAddress(
949 MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
950 unsigned FrameOffset, unsigned Size) const {
Tom Stellard96468902014-09-24 01:33:17 +0000951 MachineFunction *MF = MBB.getParent();
952 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000953 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
Tom Stellard96468902014-09-24 01:33:17 +0000954 DebugLoc DL = MBB.findDebugLoc(MI);
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000955 unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
Tom Stellard96468902014-09-24 01:33:17 +0000956 unsigned WavefrontSize = ST.getWavefrontSize();
957
958 unsigned TIDReg = MFI->getTIDReg();
959 if (!MFI->hasCalculatedTID()) {
960 MachineBasicBlock &Entry = MBB.getParent()->front();
961 MachineBasicBlock::iterator Insert = Entry.front();
962 DebugLoc DL = Insert->getDebugLoc();
963
Tom Stellard19f43012016-07-28 14:30:43 +0000964 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
965 *MF);
Tom Stellard96468902014-09-24 01:33:17 +0000966 if (TIDReg == AMDGPU::NoRegister)
967 return TIDReg;
968
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000969 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
Tom Stellard96468902014-09-24 01:33:17 +0000970 WorkGroupSize > WavefrontSize) {
Matt Arsenaultac234b62015-11-30 21:15:57 +0000971 unsigned TIDIGXReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000972 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
Matt Arsenaultac234b62015-11-30 21:15:57 +0000973 unsigned TIDIGYReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000974 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
Matt Arsenaultac234b62015-11-30 21:15:57 +0000975 unsigned TIDIGZReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000976 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
Tom Stellard96468902014-09-24 01:33:17 +0000977 unsigned InputPtrReg =
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000978 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer7149aab2015-03-01 18:09:56 +0000979 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard96468902014-09-24 01:33:17 +0000980 if (!Entry.isLiveIn(Reg))
981 Entry.addLiveIn(Reg);
982 }
983
Matthias Braun7dc03f02016-04-06 02:47:09 +0000984 RS->enterBasicBlock(Entry);
Matt Arsenault0c90e952015-11-06 18:17:45 +0000985 // FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard96468902014-09-24 01:33:17 +0000986 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
987 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
988 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
989 .addReg(InputPtrReg)
990 .addImm(SI::KernelInputOffsets::NGROUPS_Z);
991 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
992 .addReg(InputPtrReg)
993 .addImm(SI::KernelInputOffsets::NGROUPS_Y);
994
995 // NGROUPS.X * NGROUPS.Y
996 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
997 .addReg(STmp1)
998 .addReg(STmp0);
999 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
1000 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
1001 .addReg(STmp1)
1002 .addReg(TIDIGXReg);
1003 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
1004 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
1005 .addReg(STmp0)
1006 .addReg(TIDIGYReg)
1007 .addReg(TIDReg);
1008 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
1009 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
1010 .addReg(TIDReg)
1011 .addReg(TIDIGZReg);
1012 } else {
1013 // Get the wave id
1014 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
1015 TIDReg)
1016 .addImm(-1)
1017 .addImm(0);
1018
Marek Olsakc5368502015-01-15 18:43:01 +00001019 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard96468902014-09-24 01:33:17 +00001020 TIDReg)
1021 .addImm(-1)
1022 .addReg(TIDReg);
1023 }
1024
1025 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
1026 TIDReg)
1027 .addImm(2)
1028 .addReg(TIDReg);
1029 MFI->setTIDReg(TIDReg);
1030 }
1031
1032 // Add FrameIndex to LDS offset
Matt Arsenault52ef4012016-07-26 16:45:58 +00001033 unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
Tom Stellard96468902014-09-24 01:33:17 +00001034 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
1035 .addImm(LDSOffset)
1036 .addReg(TIDReg);
1037
1038 return TmpReg;
1039}
1040
Tom Stellardd37630e2016-04-07 14:47:07 +00001041void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
1042 MachineBasicBlock::iterator MI,
Nicolai Haehnle87323da2015-12-17 16:46:42 +00001043 int Count) const {
Tom Stellard341e2932016-05-02 18:02:24 +00001044 DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellardeba61072014-05-02 15:41:42 +00001045 while (Count > 0) {
1046 int Arg;
1047 if (Count >= 8)
1048 Arg = 7;
1049 else
1050 Arg = Count - 1;
1051 Count -= 8;
Tom Stellard341e2932016-05-02 18:02:24 +00001052 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
Tom Stellardeba61072014-05-02 15:41:42 +00001053 .addImm(Arg);
1054 }
1055}
1056
Tom Stellardcb6ba622016-04-30 00:23:06 +00001057void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
1058 MachineBasicBlock::iterator MI) const {
1059 insertWaitStates(MBB, MI, 1);
1060}
1061
Jan Sjodina06bfe02017-05-15 20:18:37 +00001062void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
1063 auto MF = MBB.getParent();
1064 SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1065
1066 assert(Info->isEntryFunction());
1067
1068 if (MBB.succ_empty()) {
1069 bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
1070 if (HasNoTerminator)
1071 BuildMI(MBB, MBB.end(), DebugLoc(),
1072 get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
1073 }
1074}
1075
Tom Stellardcb6ba622016-04-30 00:23:06 +00001076unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
1077 switch (MI.getOpcode()) {
1078 default: return 1; // FIXME: Do wait states equal cycles?
1079
1080 case AMDGPU::S_NOP:
1081 return MI.getOperand(0).getImm() + 1;
1082 }
1083}
1084
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001085bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1086 MachineBasicBlock &MBB = *MI.getParent();
Tom Stellardeba61072014-05-02 15:41:42 +00001087 DebugLoc DL = MBB.findDebugLoc(MI);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001088 switch (MI.getOpcode()) {
Tom Stellardeba61072014-05-02 15:41:42 +00001089 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
Eugene Zelenko59e12822017-08-08 00:47:13 +00001090 case AMDGPU::S_MOV_B64_term:
Matt Arsenaulte6740752016-09-29 01:44:16 +00001091 // This is only a terminator to get the correct spill code placement during
1092 // register allocation.
1093 MI.setDesc(get(AMDGPU::S_MOV_B64));
1094 break;
Eugene Zelenko59e12822017-08-08 00:47:13 +00001095
1096 case AMDGPU::S_XOR_B64_term:
Matt Arsenaulte6740752016-09-29 01:44:16 +00001097 // This is only a terminator to get the correct spill code placement during
1098 // register allocation.
1099 MI.setDesc(get(AMDGPU::S_XOR_B64));
1100 break;
Eugene Zelenko59e12822017-08-08 00:47:13 +00001101
1102 case AMDGPU::S_ANDN2_B64_term:
Matt Arsenaulte6740752016-09-29 01:44:16 +00001103 // This is only a terminator to get the correct spill code placement during
1104 // register allocation.
1105 MI.setDesc(get(AMDGPU::S_ANDN2_B64));
1106 break;
Eugene Zelenko59e12822017-08-08 00:47:13 +00001107
Tom Stellard4842c052015-01-07 20:27:25 +00001108 case AMDGPU::V_MOV_B64_PSEUDO: {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001109 unsigned Dst = MI.getOperand(0).getReg();
Tom Stellard4842c052015-01-07 20:27:25 +00001110 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1111 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1112
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001113 const MachineOperand &SrcOp = MI.getOperand(1);
Tom Stellard4842c052015-01-07 20:27:25 +00001114 // FIXME: Will this work for 64-bit floating point immediates?
1115 assert(!SrcOp.isFPImm());
1116 if (SrcOp.isImm()) {
1117 APInt Imm(64, SrcOp.getImm());
1118 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001119 .addImm(Imm.getLoBits(32).getZExtValue())
1120 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001121 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001122 .addImm(Imm.getHiBits(32).getZExtValue())
1123 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001124 } else {
1125 assert(SrcOp.isReg());
1126 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001127 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
1128 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001129 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault80bc3552016-06-13 15:53:52 +00001130 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
1131 .addReg(Dst, RegState::Implicit | RegState::Define);
Tom Stellard4842c052015-01-07 20:27:25 +00001132 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001133 MI.eraseFromParent();
Tom Stellard4842c052015-01-07 20:27:25 +00001134 break;
1135 }
Connor Abbott66b9bd62017-08-04 18:36:54 +00001136 case AMDGPU::V_SET_INACTIVE_B32: {
1137 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1138 .addReg(AMDGPU::EXEC);
1139 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
1140 .add(MI.getOperand(2));
1141 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1142 .addReg(AMDGPU::EXEC);
1143 MI.eraseFromParent();
1144 break;
1145 }
1146 case AMDGPU::V_SET_INACTIVE_B64: {
1147 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1148 .addReg(AMDGPU::EXEC);
1149 MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
1150 MI.getOperand(0).getReg())
1151 .add(MI.getOperand(2));
1152 expandPostRAPseudo(*Copy);
1153 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1154 .addReg(AMDGPU::EXEC);
1155 MI.eraseFromParent();
1156 break;
1157 }
Nicolai Haehnlea7852092016-10-24 14:56:02 +00001158 case AMDGPU::V_MOVRELD_B32_V1:
1159 case AMDGPU::V_MOVRELD_B32_V2:
1160 case AMDGPU::V_MOVRELD_B32_V4:
1161 case AMDGPU::V_MOVRELD_B32_V8:
1162 case AMDGPU::V_MOVRELD_B32_V16: {
1163 const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
1164 unsigned VecReg = MI.getOperand(0).getReg();
1165 bool IsUndef = MI.getOperand(1).isUndef();
1166 unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
1167 assert(VecReg == MI.getOperand(1).getReg());
1168
1169 MachineInstr *MovRel =
1170 BuildMI(MBB, MI, DL, MovRelDesc)
1171 .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
Diana Picus116bbab2017-01-13 09:58:52 +00001172 .add(MI.getOperand(2))
Nicolai Haehnlea7852092016-10-24 14:56:02 +00001173 .addReg(VecReg, RegState::ImplicitDefine)
Diana Picus116bbab2017-01-13 09:58:52 +00001174 .addReg(VecReg,
1175 RegState::Implicit | (IsUndef ? RegState::Undef : 0));
Nicolai Haehnlea7852092016-10-24 14:56:02 +00001176
1177 const int ImpDefIdx =
1178 MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
1179 const int ImpUseIdx = ImpDefIdx + 1;
1180 MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1181
1182 MI.eraseFromParent();
1183 break;
1184 }
Tom Stellardbf3e6e52016-06-14 20:29:59 +00001185 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
Tom Stellardc93fc112015-12-10 02:13:01 +00001186 MachineFunction &MF = *MBB.getParent();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001187 unsigned Reg = MI.getOperand(0).getReg();
Matt Arsenault11587d92016-08-10 19:11:45 +00001188 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
1189 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
Tom Stellardc93fc112015-12-10 02:13:01 +00001190
1191 // Create a bundle so these instructions won't be re-ordered by the
1192 // post-RA scheduler.
1193 MIBundleBuilder Bundler(MBB, MI);
1194 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
1195
1196 // Add 32-bit offset from this instruction to the start of the
1197 // constant data.
1198 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001199 .addReg(RegLo)
Diana Picus116bbab2017-01-13 09:58:52 +00001200 .add(MI.getOperand(1)));
Tom Stellardc93fc112015-12-10 02:13:01 +00001201
Konstantin Zhuravlyovc96b5d72016-10-14 04:37:34 +00001202 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
1203 .addReg(RegHi);
1204 if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
1205 MIB.addImm(0);
1206 else
Diana Picus116bbab2017-01-13 09:58:52 +00001207 MIB.add(MI.getOperand(2));
Konstantin Zhuravlyovc96b5d72016-10-14 04:37:34 +00001208
1209 Bundler.append(MIB);
Eugene Zelenko59e12822017-08-08 00:47:13 +00001210 finalizeBundle(MBB, Bundler.begin());
Tom Stellardc93fc112015-12-10 02:13:01 +00001211
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001212 MI.eraseFromParent();
Tom Stellardc93fc112015-12-10 02:13:01 +00001213 break;
1214 }
Connor Abbott92638ab2017-08-04 18:36:52 +00001215 case AMDGPU::EXIT_WWM: {
1216 // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
1217 // is exited.
1218 MI.setDesc(get(AMDGPU::S_MOV_B64));
1219 break;
1220 }
Tom Stellardeba61072014-05-02 15:41:42 +00001221 }
1222 return true;
1223}
1224
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001225bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
1226 MachineOperand &Src0,
1227 unsigned Src0OpName,
1228 MachineOperand &Src1,
1229 unsigned Src1OpName) const {
1230 MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
1231 if (!Src0Mods)
1232 return false;
1233
1234 MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
1235 assert(Src1Mods &&
1236 "All commutable instructions have both src0 and src1 modifiers");
1237
1238 int Src0ModsVal = Src0Mods->getImm();
1239 int Src1ModsVal = Src1Mods->getImm();
1240
1241 Src1Mods->setImm(Src0ModsVal);
1242 Src0Mods->setImm(Src1ModsVal);
1243 return true;
1244}
1245
1246static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
1247 MachineOperand &RegOp,
Matt Arsenault25dba302016-09-13 19:03:12 +00001248 MachineOperand &NonRegOp) {
1249 unsigned Reg = RegOp.getReg();
1250 unsigned SubReg = RegOp.getSubReg();
1251 bool IsKill = RegOp.isKill();
1252 bool IsDead = RegOp.isDead();
1253 bool IsUndef = RegOp.isUndef();
1254 bool IsDebug = RegOp.isDebug();
1255
1256 if (NonRegOp.isImm())
1257 RegOp.ChangeToImmediate(NonRegOp.getImm());
1258 else if (NonRegOp.isFI())
1259 RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
1260 else
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001261 return nullptr;
1262
Matt Arsenault25dba302016-09-13 19:03:12 +00001263 NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
1264 NonRegOp.setSubReg(SubReg);
1265
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001266 return &MI;
1267}
1268
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001269MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001270 unsigned Src0Idx,
1271 unsigned Src1Idx) const {
1272 assert(!NewMI && "this should never be used");
1273
1274 unsigned Opc = MI.getOpcode();
1275 int CommutedOpcode = commuteOpcode(Opc);
Marek Olsakcfbdba22015-06-26 20:29:10 +00001276 if (CommutedOpcode == -1)
1277 return nullptr;
1278
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001279 assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
1280 static_cast<int>(Src0Idx) &&
1281 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
1282 static_cast<int>(Src1Idx) &&
1283 "inconsistency with findCommutedOpIndices");
1284
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001285 MachineOperand &Src0 = MI.getOperand(Src0Idx);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001286 MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenaultaa5ccfb2014-10-17 18:00:37 +00001287
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001288 MachineInstr *CommutedMI = nullptr;
1289 if (Src0.isReg() && Src1.isReg()) {
1290 if (isOperandLegal(MI, Src1Idx, &Src0)) {
1291 // Be sure to copy the source modifiers to the right place.
1292 CommutedMI
1293 = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
Matt Arsenaultd282ada2014-10-17 18:00:48 +00001294 }
1295
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001296 } else if (Src0.isReg() && !Src1.isReg()) {
1297 // src0 should always be able to support any operand type, so no need to
1298 // check operand legality.
1299 CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
1300 } else if (!Src0.isReg() && Src1.isReg()) {
1301 if (isOperandLegal(MI, Src1Idx, &Src0))
1302 CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
Tom Stellard82166022013-11-13 23:36:37 +00001303 } else {
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001304 // FIXME: Found two non registers to commute. This does happen.
1305 return nullptr;
Tom Stellard82166022013-11-13 23:36:37 +00001306 }
Christian Konig3c145802013-03-27 09:12:59 +00001307
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001308 if (CommutedMI) {
1309 swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
1310 Src1, AMDGPU::OpName::src1_modifiers);
1311
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001312 CommutedMI->setDesc(get(CommutedOpcode));
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001313 }
Christian Konig3c145802013-03-27 09:12:59 +00001314
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001315 return CommutedMI;
Christian Konig76edd4f2013-02-26 17:52:29 +00001316}
1317
Matt Arsenault92befe72014-09-26 17:54:54 +00001318// This needs to be implemented because the source modifiers may be inserted
1319// between the true commutable operands, and the base
1320// TargetInstrInfo::commuteInstruction uses it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001321bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
Andrew Kaylor16c4da02015-09-28 20:33:22 +00001322 unsigned &SrcOpIdx1) const {
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00001323 if (!MI.isCommutable())
Matt Arsenault92befe72014-09-26 17:54:54 +00001324 return false;
1325
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001326 unsigned Opc = MI.getOpcode();
Matt Arsenault92befe72014-09-26 17:54:54 +00001327 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1328 if (Src0Idx == -1)
1329 return false;
1330
Matt Arsenault92befe72014-09-26 17:54:54 +00001331 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1332 if (Src1Idx == -1)
1333 return false;
1334
Andrew Kaylor16c4da02015-09-28 20:33:22 +00001335 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault92befe72014-09-26 17:54:54 +00001336}
1337
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001338bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1339 int64_t BrOffset) const {
1340 // BranchRelaxation should never have to check s_setpc_b64 because its dest
1341 // block is unanalyzable.
1342 assert(BranchOp != AMDGPU::S_SETPC_B64);
1343
1344 // Convert to dwords.
1345 BrOffset /= 4;
1346
1347 // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
1348 // from the next instruction.
1349 BrOffset -= 1;
1350
1351 return isIntN(BranchOffsetBits, BrOffset);
1352}
1353
1354MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
1355 const MachineInstr &MI) const {
1356 if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
1357 // This would be a difficult analysis to perform, but can always be legal so
1358 // there's no need to analyze it.
1359 return nullptr;
1360 }
1361
1362 return MI.getOperand(0).getMBB();
1363}
1364
1365unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1366 MachineBasicBlock &DestBB,
1367 const DebugLoc &DL,
1368 int64_t BrOffset,
1369 RegScavenger *RS) const {
1370 assert(RS && "RegScavenger required for long branching");
1371 assert(MBB.empty() &&
1372 "new block should be inserted for expanding unconditional branch");
1373 assert(MBB.pred_size() == 1);
1374
1375 MachineFunction *MF = MBB.getParent();
1376 MachineRegisterInfo &MRI = MF->getRegInfo();
1377
1378 // FIXME: Virtual register workaround for RegScavenger not working with empty
1379 // blocks.
1380 unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1381
1382 auto I = MBB.end();
1383
1384 // We need to compute the offset relative to the instruction immediately after
1385 // s_getpc_b64. Insert pc arithmetic code before last terminator.
1386 MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
1387
1388 // TODO: Handle > 32-bit block address.
1389 if (BrOffset >= 0) {
1390 BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
1391 .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1392 .addReg(PCReg, 0, AMDGPU::sub0)
1393 .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
1394 BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
1395 .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1396 .addReg(PCReg, 0, AMDGPU::sub1)
1397 .addImm(0);
1398 } else {
1399 // Backwards branch.
1400 BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
1401 .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1402 .addReg(PCReg, 0, AMDGPU::sub0)
1403 .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
1404 BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
1405 .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1406 .addReg(PCReg, 0, AMDGPU::sub1)
1407 .addImm(0);
1408 }
1409
1410 // Insert the indirect branch after the other terminator.
1411 BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
1412 .addReg(PCReg);
1413
1414 // FIXME: If spilling is necessary, this will fail because this scavenger has
1415 // no emergency stack slots. It is non-trivial to spill in this situation,
1416 // because the restore code needs to be specially placed after the
1417 // jump. BranchRelaxation then needs to be made aware of the newly inserted
1418 // block.
1419 //
1420 // If a spill is needed for the pc register pair, we need to insert a spill
1421 // restore block right before the destination block, and insert a short branch
1422 // into the old destination block's fallthrough predecessor.
1423 // e.g.:
1424 //
1425 // s_cbranch_scc0 skip_long_branch:
1426 //
1427 // long_branch_bb:
1428 // spill s[8:9]
1429 // s_getpc_b64 s[8:9]
1430 // s_add_u32 s8, s8, restore_bb
1431 // s_addc_u32 s9, s9, 0
1432 // s_setpc_b64 s[8:9]
1433 //
1434 // skip_long_branch:
1435 // foo;
1436 //
1437 // .....
1438 //
1439 // dest_bb_fallthrough_predecessor:
1440 // bar;
1441 // s_branch dest_bb
1442 //
1443 // restore_bb:
1444 // restore s[8:9]
1445 // fallthrough dest_bb
1446 ///
1447 // dest_bb:
1448 // buzz;
1449
1450 RS->enterBasicBlockEnd(MBB);
1451 unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
1452 MachineBasicBlock::iterator(GetPC), 0);
1453 MRI.replaceRegWith(PCReg, Scav);
1454 MRI.clearVirtRegs();
1455 RS->setRegUsed(Scav);
1456
1457 return 4 + 8 + 4 + 4;
1458}
1459
Matt Arsenault6d093802016-05-21 00:29:27 +00001460unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1461 switch (Cond) {
1462 case SIInstrInfo::SCC_TRUE:
1463 return AMDGPU::S_CBRANCH_SCC1;
1464 case SIInstrInfo::SCC_FALSE:
1465 return AMDGPU::S_CBRANCH_SCC0;
Matt Arsenault49459052016-05-21 00:29:40 +00001466 case SIInstrInfo::VCCNZ:
1467 return AMDGPU::S_CBRANCH_VCCNZ;
1468 case SIInstrInfo::VCCZ:
1469 return AMDGPU::S_CBRANCH_VCCZ;
1470 case SIInstrInfo::EXECNZ:
1471 return AMDGPU::S_CBRANCH_EXECNZ;
1472 case SIInstrInfo::EXECZ:
1473 return AMDGPU::S_CBRANCH_EXECZ;
Matt Arsenault6d093802016-05-21 00:29:27 +00001474 default:
1475 llvm_unreachable("invalid branch predicate");
1476 }
1477}
1478
1479SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1480 switch (Opcode) {
1481 case AMDGPU::S_CBRANCH_SCC0:
1482 return SCC_FALSE;
1483 case AMDGPU::S_CBRANCH_SCC1:
1484 return SCC_TRUE;
Matt Arsenault49459052016-05-21 00:29:40 +00001485 case AMDGPU::S_CBRANCH_VCCNZ:
1486 return VCCNZ;
1487 case AMDGPU::S_CBRANCH_VCCZ:
1488 return VCCZ;
1489 case AMDGPU::S_CBRANCH_EXECNZ:
1490 return EXECNZ;
1491 case AMDGPU::S_CBRANCH_EXECZ:
1492 return EXECZ;
Matt Arsenault6d093802016-05-21 00:29:27 +00001493 default:
1494 return INVALID_BR;
1495 }
1496}
1497
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001498bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
1499 MachineBasicBlock::iterator I,
1500 MachineBasicBlock *&TBB,
1501 MachineBasicBlock *&FBB,
1502 SmallVectorImpl<MachineOperand> &Cond,
1503 bool AllowModify) const {
Matt Arsenault6d093802016-05-21 00:29:27 +00001504 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1505 // Unconditional Branch
1506 TBB = I->getOperand(0).getMBB();
1507 return false;
1508 }
1509
Jan Sjodina06bfe02017-05-15 20:18:37 +00001510 MachineBasicBlock *CondBB = nullptr;
Matt Arsenault6d093802016-05-21 00:29:27 +00001511
Jan Sjodina06bfe02017-05-15 20:18:37 +00001512 if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
1513 CondBB = I->getOperand(1).getMBB();
1514 Cond.push_back(I->getOperand(0));
1515 } else {
1516 BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1517 if (Pred == INVALID_BR)
1518 return true;
Matt Arsenault6d093802016-05-21 00:29:27 +00001519
Jan Sjodina06bfe02017-05-15 20:18:37 +00001520 CondBB = I->getOperand(0).getMBB();
1521 Cond.push_back(MachineOperand::CreateImm(Pred));
1522 Cond.push_back(I->getOperand(1)); // Save the branch register.
1523 }
Matt Arsenault6d093802016-05-21 00:29:27 +00001524 ++I;
1525
1526 if (I == MBB.end()) {
1527 // Conditional branch followed by fall-through.
1528 TBB = CondBB;
1529 return false;
1530 }
1531
1532 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1533 TBB = CondBB;
1534 FBB = I->getOperand(0).getMBB();
1535 return false;
1536 }
1537
1538 return true;
1539}
1540
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001541bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
1542 MachineBasicBlock *&FBB,
1543 SmallVectorImpl<MachineOperand> &Cond,
1544 bool AllowModify) const {
1545 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
1546 if (I == MBB.end())
1547 return false;
1548
1549 if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1550 return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
1551
1552 ++I;
1553
1554 // TODO: Should be able to treat as fallthrough?
1555 if (I == MBB.end())
1556 return true;
1557
1558 if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
1559 return true;
1560
1561 MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
1562
1563 // Specifically handle the case where the conditional branch is to the same
1564 // destination as the mask branch. e.g.
1565 //
1566 // si_mask_branch BB8
1567 // s_cbranch_execz BB8
1568 // s_cbranch BB9
1569 //
1570 // This is required to understand divergent loops which may need the branches
1571 // to be relaxed.
1572 if (TBB != MaskBrDest || Cond.empty())
1573 return true;
1574
1575 auto Pred = Cond[0].getImm();
1576 return (Pred != EXECZ && Pred != EXECNZ);
1577}
1578
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +00001579unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001580 int *BytesRemoved) const {
Matt Arsenault6d093802016-05-21 00:29:27 +00001581 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
1582
1583 unsigned Count = 0;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001584 unsigned RemovedSize = 0;
Matt Arsenault6d093802016-05-21 00:29:27 +00001585 while (I != MBB.end()) {
1586 MachineBasicBlock::iterator Next = std::next(I);
Matt Arsenault6bc43d82016-10-06 16:20:41 +00001587 if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1588 I = Next;
1589 continue;
1590 }
1591
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001592 RemovedSize += getInstSizeInBytes(*I);
Matt Arsenault6d093802016-05-21 00:29:27 +00001593 I->eraseFromParent();
1594 ++Count;
1595 I = Next;
1596 }
1597
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001598 if (BytesRemoved)
1599 *BytesRemoved = RemovedSize;
1600
Matt Arsenault6d093802016-05-21 00:29:27 +00001601 return Count;
1602}
1603
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001604// Copy the flags onto the implicit condition register operand.
1605static void preserveCondRegFlags(MachineOperand &CondReg,
1606 const MachineOperand &OrigCond) {
1607 CondReg.setIsUndef(OrigCond.isUndef());
1608 CondReg.setIsKill(OrigCond.isKill());
1609}
1610
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +00001611unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
Matt Arsenault6d093802016-05-21 00:29:27 +00001612 MachineBasicBlock *TBB,
1613 MachineBasicBlock *FBB,
1614 ArrayRef<MachineOperand> Cond,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001615 const DebugLoc &DL,
1616 int *BytesAdded) const {
Matt Arsenault6d093802016-05-21 00:29:27 +00001617 if (!FBB && Cond.empty()) {
1618 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1619 .addMBB(TBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001620 if (BytesAdded)
1621 *BytesAdded = 4;
Matt Arsenault6d093802016-05-21 00:29:27 +00001622 return 1;
1623 }
1624
Jan Sjodina06bfe02017-05-15 20:18:37 +00001625 if(Cond.size() == 1 && Cond[0].isReg()) {
1626 BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
1627 .add(Cond[0])
1628 .addMBB(TBB);
1629 return 1;
1630 }
1631
Matt Arsenault6d093802016-05-21 00:29:27 +00001632 assert(TBB && Cond[0].isImm());
1633
1634 unsigned Opcode
1635 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1636
1637 if (!FBB) {
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001638 Cond[1].isUndef();
1639 MachineInstr *CondBr =
1640 BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault6d093802016-05-21 00:29:27 +00001641 .addMBB(TBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001642
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001643 // Copy the flags onto the implicit condition register operand.
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001644 preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001645
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001646 if (BytesAdded)
1647 *BytesAdded = 4;
Matt Arsenault6d093802016-05-21 00:29:27 +00001648 return 1;
1649 }
1650
1651 assert(TBB && FBB);
1652
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001653 MachineInstr *CondBr =
1654 BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault6d093802016-05-21 00:29:27 +00001655 .addMBB(TBB);
1656 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1657 .addMBB(FBB);
1658
Matt Arsenault52f14ec2016-11-07 19:09:27 +00001659 MachineOperand &CondReg = CondBr->getOperand(1);
1660 CondReg.setIsUndef(Cond[1].isUndef());
1661 CondReg.setIsKill(Cond[1].isKill());
1662
Matt Arsenaulta2b036e2016-09-14 17:23:48 +00001663 if (BytesAdded)
1664 *BytesAdded = 8;
1665
Matt Arsenault6d093802016-05-21 00:29:27 +00001666 return 2;
1667}
1668
Matt Arsenault1b9fc8e2016-09-14 20:43:16 +00001669bool SIInstrInfo::reverseBranchCondition(
Matt Arsenault72fcd5f2016-05-21 00:29:34 +00001670 SmallVectorImpl<MachineOperand> &Cond) const {
Jan Sjodina06bfe02017-05-15 20:18:37 +00001671 if (Cond.size() != 2) {
1672 return true;
1673 }
1674
1675 if (Cond[0].isImm()) {
1676 Cond[0].setImm(-Cond[0].getImm());
1677 return false;
1678 }
1679
1680 return true;
Matt Arsenault72fcd5f2016-05-21 00:29:34 +00001681}
1682
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001683bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
1684 ArrayRef<MachineOperand> Cond,
1685 unsigned TrueReg, unsigned FalseReg,
1686 int &CondCycles,
1687 int &TrueCycles, int &FalseCycles) const {
1688 switch (Cond[0].getImm()) {
1689 case VCCNZ:
1690 case VCCZ: {
1691 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1692 const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1693 assert(MRI.getRegClass(FalseReg) == RC);
1694
1695 int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1696 CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1697
1698 // Limit to equal cost for branch vs. N v_cndmask_b32s.
1699 return !RI.isSGPRClass(RC) && NumInsts <= 6;
1700 }
1701 case SCC_TRUE:
1702 case SCC_FALSE: {
1703 // FIXME: We could insert for VGPRs if we could replace the original compare
1704 // with a vector one.
1705 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1706 const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1707 assert(MRI.getRegClass(FalseReg) == RC);
1708
1709 int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1710
1711 // Multiples of 8 can do s_cselect_b64
1712 if (NumInsts % 2 == 0)
1713 NumInsts /= 2;
1714
1715 CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1716 return RI.isSGPRClass(RC);
1717 }
1718 default:
1719 return false;
1720 }
1721}
1722
1723void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
1724 MachineBasicBlock::iterator I, const DebugLoc &DL,
1725 unsigned DstReg, ArrayRef<MachineOperand> Cond,
1726 unsigned TrueReg, unsigned FalseReg) const {
1727 BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
1728 if (Pred == VCCZ || Pred == SCC_FALSE) {
1729 Pred = static_cast<BranchPredicate>(-Pred);
1730 std::swap(TrueReg, FalseReg);
1731 }
1732
1733 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1734 const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001735 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001736
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001737 if (DstSize == 32) {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001738 unsigned SelOp = Pred == SCC_TRUE ?
1739 AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
1740
1741 // Instruction's operands are backwards from what is expected.
1742 MachineInstr *Select =
1743 BuildMI(MBB, I, DL, get(SelOp), DstReg)
1744 .addReg(FalseReg)
1745 .addReg(TrueReg);
1746
1747 preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1748 return;
1749 }
1750
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001751 if (DstSize == 64 && Pred == SCC_TRUE) {
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001752 MachineInstr *Select =
1753 BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
1754 .addReg(FalseReg)
1755 .addReg(TrueReg);
1756
1757 preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1758 return;
1759 }
1760
1761 static const int16_t Sub0_15[] = {
1762 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1763 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1764 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1765 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1766 };
1767
1768 static const int16_t Sub0_15_64[] = {
1769 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1770 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1771 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1772 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1773 };
1774
1775 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
1776 const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
1777 const int16_t *SubIndices = Sub0_15;
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001778 int NElts = DstSize / 32;
Matt Arsenault9f5e0ef2017-01-25 04:25:02 +00001779
1780 // 64-bit select is only avaialble for SALU.
1781 if (Pred == SCC_TRUE) {
1782 SelOp = AMDGPU::S_CSELECT_B64;
1783 EltRC = &AMDGPU::SGPR_64RegClass;
1784 SubIndices = Sub0_15_64;
1785
1786 assert(NElts % 2 == 0);
1787 NElts /= 2;
1788 }
1789
1790 MachineInstrBuilder MIB = BuildMI(
1791 MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
1792
1793 I = MIB->getIterator();
1794
1795 SmallVector<unsigned, 8> Regs;
1796 for (int Idx = 0; Idx != NElts; ++Idx) {
1797 unsigned DstElt = MRI.createVirtualRegister(EltRC);
1798 Regs.push_back(DstElt);
1799
1800 unsigned SubIdx = SubIndices[Idx];
1801
1802 MachineInstr *Select =
1803 BuildMI(MBB, I, DL, get(SelOp), DstElt)
1804 .addReg(FalseReg, 0, SubIdx)
1805 .addReg(TrueReg, 0, SubIdx);
1806 preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1807
1808 MIB.addReg(DstElt)
1809 .addImm(SubIdx);
1810 }
1811}
1812
Sam Kolton27e0f8b2017-03-31 11:42:43 +00001813bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
1814 switch (MI.getOpcode()) {
1815 case AMDGPU::V_MOV_B32_e32:
1816 case AMDGPU::V_MOV_B32_e64:
1817 case AMDGPU::V_MOV_B64_PSEUDO: {
1818 // If there are additional implicit register operands, this may be used for
1819 // register indexing so the source register operand isn't simply copied.
1820 unsigned NumOps = MI.getDesc().getNumOperands() +
1821 MI.getDesc().getNumImplicitUses();
1822
1823 return MI.getNumOperands() == NumOps;
1824 }
1825 case AMDGPU::S_MOV_B32:
1826 case AMDGPU::S_MOV_B64:
1827 case AMDGPU::COPY:
1828 return true;
1829 default:
1830 return false;
1831 }
1832}
1833
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001834static void removeModOperands(MachineInstr &MI) {
1835 unsigned Opc = MI.getOpcode();
1836 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1837 AMDGPU::OpName::src0_modifiers);
1838 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1839 AMDGPU::OpName::src1_modifiers);
1840 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1841 AMDGPU::OpName::src2_modifiers);
1842
1843 MI.RemoveOperand(Src2ModIdx);
1844 MI.RemoveOperand(Src1ModIdx);
1845 MI.RemoveOperand(Src0ModIdx);
1846}
1847
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001848bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001849 unsigned Reg, MachineRegisterInfo *MRI) const {
1850 if (!MRI->hasOneNonDBGUse(Reg))
1851 return false;
1852
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001853 unsigned Opc = UseMI.getOpcode();
Tom Stellard2add8a12016-09-06 20:00:26 +00001854 if (Opc == AMDGPU::COPY) {
1855 bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
1856 switch (DefMI.getOpcode()) {
1857 default:
1858 return false;
1859 case AMDGPU::S_MOV_B64:
1860 // TODO: We could fold 64-bit immediates, but this get compilicated
1861 // when there are sub-registers.
1862 return false;
1863
1864 case AMDGPU::V_MOV_B32_e32:
1865 case AMDGPU::S_MOV_B32:
1866 break;
1867 }
1868 unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1869 const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
1870 assert(ImmOp);
1871 // FIXME: We could handle FrameIndex values here.
1872 if (!ImmOp->isImm()) {
1873 return false;
1874 }
1875 UseMI.setDesc(get(NewOpc));
1876 UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
1877 UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
1878 return true;
1879 }
1880
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001881 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
1882 Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
Matt Arsenault2ed21932017-02-27 20:21:31 +00001883 // Don't fold if we are using source or output modifiers. The new VOP2
1884 // instructions don't have them.
1885 if (hasAnyModifiersSet(UseMI))
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001886 return false;
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001887
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001888 const MachineOperand &ImmOp = DefMI.getOperand(1);
Matt Arsenault3d1c1de2016-04-14 21:58:24 +00001889
1890 // If this is a free constant, there's no reason to do this.
1891 // TODO: We could fold this here instead of letting SIFoldOperands do it
1892 // later.
Matt Arsenault4bd72362016-12-10 00:39:12 +00001893 MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
1894
1895 // Any src operand can be used for the legality check.
1896 if (isInlineConstant(UseMI, *Src0, ImmOp))
Matt Arsenault3d1c1de2016-04-14 21:58:24 +00001897 return false;
1898
Matt Arsenault2ed21932017-02-27 20:21:31 +00001899 bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001900 MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
1901 MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001902
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001903 // Multiplied part is the constant: Use v_madmk_{f16, f32}.
Matt Arsenaultf0783302015-02-21 21:29:10 +00001904 // We should only expect these to be on src0 due to canonicalizations.
1905 if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenaulta266bd82016-03-02 04:05:14 +00001906 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenaultf0783302015-02-21 21:29:10 +00001907 return false;
1908
Matt Arsenaulta266bd82016-03-02 04:05:14 +00001909 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenaultf0783302015-02-21 21:29:10 +00001910 return false;
1911
Nikolay Haustov65607812016-03-11 09:27:25 +00001912 // We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenaultf0783302015-02-21 21:29:10 +00001913
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001914 const int64_t Imm = DefMI.getOperand(1).getImm();
Matt Arsenaultf0783302015-02-21 21:29:10 +00001915
1916 // FIXME: This would be a lot easier if we could return a new instruction
1917 // instead of having to modify in place.
1918
1919 // Remove these first since they are at the end.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001920 UseMI.RemoveOperand(
1921 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1922 UseMI.RemoveOperand(
1923 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenaultf0783302015-02-21 21:29:10 +00001924
1925 unsigned Src1Reg = Src1->getReg();
1926 unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenaultf0783302015-02-21 21:29:10 +00001927 Src0->setReg(Src1Reg);
1928 Src0->setSubReg(Src1SubReg);
Matt Arsenault5e100162015-04-24 01:57:58 +00001929 Src0->setIsKill(Src1->isKill());
1930
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001931 if (Opc == AMDGPU::V_MAC_F32_e64 ||
1932 Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001933 UseMI.untieRegOperand(
1934 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellarddb5a11f2015-07-13 15:47:57 +00001935
Nikolay Haustov65607812016-03-11 09:27:25 +00001936 Src1->ChangeToImmediate(Imm);
Matt Arsenaultf0783302015-02-21 21:29:10 +00001937
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001938 removeModOperands(UseMI);
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001939 UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
Matt Arsenaultf0783302015-02-21 21:29:10 +00001940
1941 bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1942 if (DeleteDef)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001943 DefMI.eraseFromParent();
Matt Arsenaultf0783302015-02-21 21:29:10 +00001944
1945 return true;
1946 }
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001947
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001948 // Added part is the constant: Use v_madak_{f16, f32}.
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001949 if (Src2->isReg() && Src2->getReg() == Reg) {
1950 // Not allowed to use constant bus for another operand.
1951 // We can however allow an inline immediate as src0.
1952 if (!Src0->isImm() &&
1953 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
1954 return false;
1955
Matt Arsenaulta266bd82016-03-02 04:05:14 +00001956 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001957 return false;
1958
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001959 const int64_t Imm = DefMI.getOperand(1).getImm();
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001960
1961 // FIXME: This would be a lot easier if we could return a new instruction
1962 // instead of having to modify in place.
1963
1964 // Remove these first since they are at the end.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001965 UseMI.RemoveOperand(
1966 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1967 UseMI.RemoveOperand(
1968 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001969
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001970 if (Opc == AMDGPU::V_MAC_F32_e64 ||
1971 Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001972 UseMI.untieRegOperand(
1973 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellarddb5a11f2015-07-13 15:47:57 +00001974
1975 // ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001976 Src2->ChangeToImmediate(Imm);
1977
1978 // These come before src2.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001979 removeModOperands(UseMI);
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001980 UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001981
1982 bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1983 if (DeleteDef)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001984 DefMI.eraseFromParent();
Matt Arsenault0325d3d2015-02-21 21:29:07 +00001985
1986 return true;
1987 }
1988 }
1989
1990 return false;
1991}
1992
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00001993static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
1994 int WidthB, int OffsetB) {
1995 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1996 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1997 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1998 return LowOffset + LowWidth <= HighOffset;
1999}
2000
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002001bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
2002 MachineInstr &MIb) const {
Chad Rosierc27a18f2016-03-09 16:00:35 +00002003 unsigned BaseReg0, BaseReg1;
2004 int64_t Offset0, Offset1;
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002005
Sanjoy Dasb666ea32015-06-15 18:44:14 +00002006 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
2007 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
Tom Stellardcb6ba622016-04-30 00:23:06 +00002008
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002009 if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
Tom Stellardcb6ba622016-04-30 00:23:06 +00002010 // FIXME: Handle ds_read2 / ds_write2.
2011 return false;
2012 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002013 unsigned Width0 = (*MIa.memoperands_begin())->getSize();
2014 unsigned Width1 = (*MIb.memoperands_begin())->getSize();
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002015 if (BaseReg0 == BaseReg1 &&
2016 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
2017 return true;
2018 }
2019 }
2020
2021 return false;
2022}
2023
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002024bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
2025 MachineInstr &MIb,
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002026 AliasAnalysis *AA) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002027 assert((MIa.mayLoad() || MIa.mayStore()) &&
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002028 "MIa must load from or modify a memory location");
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002029 assert((MIb.mayLoad() || MIb.mayStore()) &&
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002030 "MIb must load from or modify a memory location");
2031
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002032 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects())
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002033 return false;
2034
2035 // XXX - Can we relax this between address spaces?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002036 if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002037 return false;
2038
Tom Stellard662f3302016-08-29 12:05:32 +00002039 if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
2040 const MachineMemOperand *MMOa = *MIa.memoperands_begin();
2041 const MachineMemOperand *MMOb = *MIb.memoperands_begin();
2042 if (MMOa->getValue() && MMOb->getValue()) {
2043 MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
2044 MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
2045 if (!AA->alias(LocA, LocB))
2046 return true;
2047 }
2048 }
2049
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002050 // TODO: Should we check the address space from the MachineMemOperand? That
2051 // would allow us to distinguish objects we know don't alias based on the
Benjamin Kramerdf005cb2015-08-08 18:27:36 +00002052 // underlying address space, even if it was lowered to a different one,
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002053 // e.g. private accesses lowered to use MUBUF instructions on a scratch
2054 // buffer.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002055 if (isDS(MIa)) {
2056 if (isDS(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002057 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2058
Matt Arsenault9608a2892017-07-29 01:26:21 +00002059 return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb);
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002060 }
2061
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002062 if (isMUBUF(MIa) || isMTBUF(MIa)) {
2063 if (isMUBUF(MIb) || isMTBUF(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002064 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2065
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002066 return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002067 }
2068
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002069 if (isSMRD(MIa)) {
2070 if (isSMRD(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002071 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2072
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002073 return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002074 }
2075
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002076 if (isFLAT(MIa)) {
2077 if (isFLAT(MIb))
Matt Arsenaultc09cc3c2014-11-19 00:01:31 +00002078 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2079
2080 return false;
2081 }
2082
2083 return false;
2084}
2085
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002086MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002087 MachineInstr &MI,
2088 LiveVariables *LV) const {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002089 bool IsF16 = false;
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002090
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002091 switch (MI.getOpcode()) {
2092 default:
2093 return nullptr;
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002094 case AMDGPU::V_MAC_F16_e64:
2095 IsF16 = true;
Simon Pilgrim0f5b3502017-07-07 10:18:57 +00002096 LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002097 case AMDGPU::V_MAC_F32_e64:
2098 break;
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002099 case AMDGPU::V_MAC_F16_e32:
2100 IsF16 = true;
Simon Pilgrim0f5b3502017-07-07 10:18:57 +00002101 LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002102 case AMDGPU::V_MAC_F32_e32: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002103 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
2104 AMDGPU::OpName::src0);
2105 const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
2106 if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002107 return nullptr;
2108 break;
2109 }
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002110 }
2111
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002112 const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2113 const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002114 const MachineOperand *Src0Mods =
2115 getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002116 const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002117 const MachineOperand *Src1Mods =
2118 getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002119 const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002120 const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2121 const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002122
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002123 return BuildMI(*MBB, MI, MI.getDebugLoc(),
2124 get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
Diana Picus116bbab2017-01-13 09:58:52 +00002125 .add(*Dst)
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002126 .addImm(Src0Mods ? Src0Mods->getImm() : 0)
Diana Picus116bbab2017-01-13 09:58:52 +00002127 .add(*Src0)
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002128 .addImm(Src1Mods ? Src1Mods->getImm() : 0)
Diana Picus116bbab2017-01-13 09:58:52 +00002129 .add(*Src1)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002130 .addImm(0) // Src mods
Diana Picus116bbab2017-01-13 09:58:52 +00002131 .add(*Src2)
Matt Arsenault3cb9ff82017-03-11 05:40:40 +00002132 .addImm(Clamp ? Clamp->getImm() : 0)
2133 .addImm(Omod ? Omod->getImm() : 0);
Tom Stellarddb5a11f2015-07-13 15:47:57 +00002134}
2135
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00002136// It's not generally safe to move VALU instructions across these since it will
2137// start using the register as a base index rather than directly.
2138// XXX - Why isn't hasSideEffects sufficient for these?
2139static bool changesVGPRIndexingMode(const MachineInstr &MI) {
2140 switch (MI.getOpcode()) {
2141 case AMDGPU::S_SET_GPR_IDX_ON:
2142 case AMDGPU::S_SET_GPR_IDX_MODE:
2143 case AMDGPU::S_SET_GPR_IDX_OFF:
2144 return true;
2145 default:
2146 return false;
2147 }
2148}
2149
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002150bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00002151 const MachineBasicBlock *MBB,
2152 const MachineFunction &MF) const {
Matt Arsenault95c78972016-07-09 01:13:51 +00002153 // XXX - Do we want the SP check in the base implementation?
2154
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00002155 // Target-independent instructions do not have an implicit-use of EXEC, even
2156 // when they operate on VGPRs. Treating EXEC modifications as scheduling
2157 // boundaries prevents incorrect movements of such instructions.
Matt Arsenault95c78972016-07-09 01:13:51 +00002158 return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00002159 MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
Tom Stellard8485fa02016-12-07 02:42:15 +00002160 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
2161 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00002162 changesVGPRIndexingMode(MI);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00002163}
2164
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +00002165bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault26faed32016-12-05 22:26:17 +00002166 switch (Imm.getBitWidth()) {
2167 case 32:
2168 return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
2169 ST.hasInv2PiInlineImm());
2170 case 64:
2171 return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
2172 ST.hasInv2PiInlineImm());
Matt Arsenault4bd72362016-12-10 00:39:12 +00002173 case 16:
Matt Arsenault9dba9bd2017-02-02 02:27:04 +00002174 return ST.has16BitInsts() &&
2175 AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
Matt Arsenault4bd72362016-12-10 00:39:12 +00002176 ST.hasInv2PiInlineImm());
Matt Arsenault26faed32016-12-05 22:26:17 +00002177 default:
2178 llvm_unreachable("invalid bitwidth");
Matt Arsenault303011a2014-12-17 21:04:08 +00002179 }
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +00002180}
2181
Matt Arsenault11a4d672015-02-13 19:05:03 +00002182bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
Matt Arsenault4bd72362016-12-10 00:39:12 +00002183 uint8_t OperandType) const {
Sam Kolton549c89d2017-06-21 08:53:38 +00002184 if (!MO.isImm() ||
2185 OperandType < AMDGPU::OPERAND_SRC_FIRST ||
2186 OperandType > AMDGPU::OPERAND_SRC_LAST)
Matt Arsenault4bd72362016-12-10 00:39:12 +00002187 return false;
2188
2189 // MachineOperand provides no way to tell the true operand size, since it only
2190 // records a 64-bit value. We need to know the size to determine if a 32-bit
2191 // floating point immediate bit pattern is legal for an integer immediate. It
2192 // would be for any 32-bit integer operand, but would not be for a 64-bit one.
2193
2194 int64_t Imm = MO.getImm();
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002195 switch (OperandType) {
2196 case AMDGPU::OPERAND_REG_IMM_INT32:
2197 case AMDGPU::OPERAND_REG_IMM_FP32:
2198 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2199 case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002200 int32_t Trunc = static_cast<int32_t>(Imm);
2201 return Trunc == Imm &&
2202 AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault11a4d672015-02-13 19:05:03 +00002203 }
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002204 case AMDGPU::OPERAND_REG_IMM_INT64:
2205 case AMDGPU::OPERAND_REG_IMM_FP64:
2206 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
Eugene Zelenko59e12822017-08-08 00:47:13 +00002207 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
Matt Arsenault4bd72362016-12-10 00:39:12 +00002208 return AMDGPU::isInlinableLiteral64(MO.getImm(),
2209 ST.hasInv2PiInlineImm());
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002210 case AMDGPU::OPERAND_REG_IMM_INT16:
2211 case AMDGPU::OPERAND_REG_IMM_FP16:
2212 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2213 case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
Matt Arsenault4bd72362016-12-10 00:39:12 +00002214 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
Matt Arsenault9dba9bd2017-02-02 02:27:04 +00002215 // A few special case instructions have 16-bit operands on subtargets
2216 // where 16-bit instructions are not legal.
2217 // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
2218 // constants in these cases
Matt Arsenault4bd72362016-12-10 00:39:12 +00002219 int16_t Trunc = static_cast<int16_t>(Imm);
Matt Arsenault9dba9bd2017-02-02 02:27:04 +00002220 return ST.has16BitInsts() &&
2221 AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault4bd72362016-12-10 00:39:12 +00002222 }
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +00002223
Matt Arsenault4bd72362016-12-10 00:39:12 +00002224 return false;
2225 }
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002226 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2227 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2228 uint32_t Trunc = static_cast<uint32_t>(Imm);
2229 return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
2230 }
Matt Arsenault4bd72362016-12-10 00:39:12 +00002231 default:
2232 llvm_unreachable("invalid bitwidth");
2233 }
Tom Stellard93fabce2013-10-10 17:11:55 +00002234}
2235
Matt Arsenaultc1ebd822016-08-13 01:43:54 +00002236bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
Matt Arsenault4bd72362016-12-10 00:39:12 +00002237 const MCOperandInfo &OpInfo) const {
Matt Arsenaultc1ebd822016-08-13 01:43:54 +00002238 switch (MO.getType()) {
2239 case MachineOperand::MO_Register:
2240 return false;
2241 case MachineOperand::MO_Immediate:
Matt Arsenault4bd72362016-12-10 00:39:12 +00002242 return !isInlineConstant(MO, OpInfo);
Matt Arsenaultc1ebd822016-08-13 01:43:54 +00002243 case MachineOperand::MO_FrameIndex:
2244 case MachineOperand::MO_MachineBasicBlock:
2245 case MachineOperand::MO_ExternalSymbol:
2246 case MachineOperand::MO_GlobalAddress:
2247 case MachineOperand::MO_MCSymbol:
2248 return true;
2249 default:
2250 llvm_unreachable("unexpected operand type");
2251 }
2252}
2253
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002254static bool compareMachineOp(const MachineOperand &Op0,
2255 const MachineOperand &Op1) {
2256 if (Op0.getType() != Op1.getType())
2257 return false;
2258
2259 switch (Op0.getType()) {
2260 case MachineOperand::MO_Register:
2261 return Op0.getReg() == Op1.getReg();
2262 case MachineOperand::MO_Immediate:
2263 return Op0.getImm() == Op1.getImm();
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002264 default:
2265 llvm_unreachable("Didn't expect to be comparing these operand types");
2266 }
2267}
2268
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002269bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
2270 const MachineOperand &MO) const {
2271 const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
Tom Stellardb02094e2014-07-21 15:45:01 +00002272
Tom Stellardfb77f002015-01-13 22:59:41 +00002273 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
Tom Stellardb02094e2014-07-21 15:45:01 +00002274
2275 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
2276 return true;
2277
2278 if (OpInfo.RegClass < 0)
2279 return false;
2280
Matt Arsenault4bd72362016-12-10 00:39:12 +00002281 if (MO.isImm() && isInlineConstant(MO, OpInfo))
2282 return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002283
Matt Arsenault4bd72362016-12-10 00:39:12 +00002284 return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellardb02094e2014-07-21 15:45:01 +00002285}
2286
Tom Stellard86d12eb2014-08-01 00:32:28 +00002287bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsaka93603d2015-01-15 18:42:51 +00002288 int Op32 = AMDGPU::getVOPe32(Opcode);
2289 if (Op32 == -1)
2290 return false;
2291
2292 return pseudoToMCOpcode(Op32) != -1;
Tom Stellard86d12eb2014-08-01 00:32:28 +00002293}
2294
Tom Stellardb4a313a2014-08-01 00:32:39 +00002295bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
2296 // The src0_modifier operand is present on all instructions
2297 // that have modifiers.
2298
2299 return AMDGPU::getNamedOperandIdx(Opcode,
2300 AMDGPU::OpName::src0_modifiers) != -1;
2301}
2302
Matt Arsenaultace5b762014-10-17 18:00:43 +00002303bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
2304 unsigned OpName) const {
2305 const MachineOperand *Mods = getNamedOperand(MI, OpName);
2306 return Mods && Mods->getImm();
2307}
2308
Matt Arsenault2ed21932017-02-27 20:21:31 +00002309bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
2310 return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
2311 hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
2312 hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
2313 hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
2314 hasModifiersSet(MI, AMDGPU::OpName::omod);
2315}
2316
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002317bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault11a4d672015-02-13 19:05:03 +00002318 const MachineOperand &MO,
Matt Arsenault4bd72362016-12-10 00:39:12 +00002319 const MCOperandInfo &OpInfo) const {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002320 // Literal constants use the constant bus.
Matt Arsenault4bd72362016-12-10 00:39:12 +00002321 //if (isLiteralConstantLike(MO, OpInfo))
2322 // return true;
2323 if (MO.isImm())
2324 return !isInlineConstant(MO, OpInfo);
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002325
Matt Arsenault4bd72362016-12-10 00:39:12 +00002326 if (!MO.isReg())
2327 return true; // Misc other operands like FrameIndex
2328
2329 if (!MO.isUse())
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002330 return false;
2331
2332 if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2333 return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
2334
2335 // FLAT_SCR is just an SGPR pair.
2336 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
2337 return true;
2338
2339 // EXEC register uses the constant bus.
2340 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
2341 return true;
2342
2343 // SGPRs use the constant bus
Matt Arsenault8226fc42016-03-02 23:00:21 +00002344 return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
2345 (!MO.isImplicit() &&
2346 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
2347 AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002348}
2349
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00002350static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
2351 for (const MachineOperand &MO : MI.implicit_operands()) {
2352 // We only care about reads.
2353 if (MO.isDef())
2354 continue;
2355
2356 switch (MO.getReg()) {
2357 case AMDGPU::VCC:
2358 case AMDGPU::M0:
2359 case AMDGPU::FLAT_SCR:
2360 return MO.getReg();
2361
2362 default:
2363 break;
2364 }
2365 }
2366
2367 return AMDGPU::NoRegister;
2368}
2369
Matt Arsenault529cf252016-06-23 01:26:16 +00002370static bool shouldReadExec(const MachineInstr &MI) {
2371 if (SIInstrInfo::isVALU(MI)) {
2372 switch (MI.getOpcode()) {
2373 case AMDGPU::V_READLANE_B32:
2374 case AMDGPU::V_READLANE_B32_si:
2375 case AMDGPU::V_READLANE_B32_vi:
2376 case AMDGPU::V_WRITELANE_B32:
2377 case AMDGPU::V_WRITELANE_B32_si:
2378 case AMDGPU::V_WRITELANE_B32_vi:
2379 return false;
2380 }
2381
2382 return true;
2383 }
2384
2385 if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
2386 SIInstrInfo::isSALU(MI) ||
2387 SIInstrInfo::isSMRD(MI))
2388 return false;
2389
2390 return true;
2391}
2392
Matt Arsenaultcb540bc2016-07-19 00:35:03 +00002393static bool isSubRegOf(const SIRegisterInfo &TRI,
2394 const MachineOperand &SuperVec,
2395 const MachineOperand &SubReg) {
2396 if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
2397 return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
2398
2399 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
2400 SubReg.getReg() == SuperVec.getReg();
2401}
2402
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002403bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
Tom Stellard93fabce2013-10-10 17:11:55 +00002404 StringRef &ErrInfo) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002405 uint16_t Opcode = MI.getOpcode();
Tom Stellarddde28a82017-05-26 16:40:03 +00002406 if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
2407 return true;
2408
Matt Arsenault89ad17c2017-06-12 16:37:55 +00002409 const MachineFunction *MF = MI.getParent()->getParent();
2410 const MachineRegisterInfo &MRI = MF->getRegInfo();
2411
Tom Stellard93fabce2013-10-10 17:11:55 +00002412 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2413 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2414 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2415
Tom Stellardca700e42014-03-17 17:03:49 +00002416 // Make sure the number of operands is correct.
2417 const MCInstrDesc &Desc = get(Opcode);
2418 if (!Desc.isVariadic() &&
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002419 Desc.getNumOperands() != MI.getNumExplicitOperands()) {
2420 ErrInfo = "Instruction has wrong number of operands.";
2421 return false;
Tom Stellardca700e42014-03-17 17:03:49 +00002422 }
2423
Matt Arsenault3d463192016-11-01 22:55:07 +00002424 if (MI.isInlineAsm()) {
2425 // Verify register classes for inlineasm constraints.
2426 for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
2427 I != E; ++I) {
2428 const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
2429 if (!RC)
2430 continue;
2431
2432 const MachineOperand &Op = MI.getOperand(I);
2433 if (!Op.isReg())
2434 continue;
2435
2436 unsigned Reg = Op.getReg();
2437 if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
2438 ErrInfo = "inlineasm operand has incorrect register class.";
2439 return false;
2440 }
2441 }
2442
2443 return true;
2444 }
2445
Changpeng Fangc9963932015-12-18 20:04:28 +00002446 // Make sure the register classes are correct.
Tom Stellardb4a313a2014-08-01 00:32:39 +00002447 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002448 if (MI.getOperand(i).isFPImm()) {
Tom Stellardfb77f002015-01-13 22:59:41 +00002449 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
2450 "all fp values to integers.";
2451 return false;
2452 }
2453
Marek Olsak8eeebcc2015-02-18 22:12:41 +00002454 int RegClass = Desc.OpInfo[i].RegClass;
2455
Tom Stellardca700e42014-03-17 17:03:49 +00002456 switch (Desc.OpInfo[i].OperandType) {
Tom Stellard1106b1c2015-01-20 17:49:41 +00002457 case MCOI::OPERAND_REGISTER:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002458 if (MI.getOperand(i).isImm()) {
Tom Stellard1106b1c2015-01-20 17:49:41 +00002459 ErrInfo = "Illegal immediate value for operand.";
2460 return false;
2461 }
2462 break;
Matt Arsenault4bd72362016-12-10 00:39:12 +00002463 case AMDGPU::OPERAND_REG_IMM_INT32:
2464 case AMDGPU::OPERAND_REG_IMM_FP32:
Tom Stellard1106b1c2015-01-20 17:49:41 +00002465 break;
Matt Arsenault4bd72362016-12-10 00:39:12 +00002466 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2467 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2468 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2469 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2470 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2471 case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
2472 const MachineOperand &MO = MI.getOperand(i);
2473 if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
Marek Olsak8eeebcc2015-02-18 22:12:41 +00002474 ErrInfo = "Illegal immediate value for operand.";
2475 return false;
Tom Stellarda305f932014-07-02 20:53:44 +00002476 }
Tom Stellardca700e42014-03-17 17:03:49 +00002477 break;
Matt Arsenault4bd72362016-12-10 00:39:12 +00002478 }
Tom Stellardca700e42014-03-17 17:03:49 +00002479 case MCOI::OPERAND_IMMEDIATE:
Matt Arsenaultffc82752016-07-05 17:09:01 +00002480 case AMDGPU::OPERAND_KIMM32:
Tom Stellardb02094e2014-07-21 15:45:01 +00002481 // Check if this operand is an immediate.
2482 // FrameIndex operands will be replaced by immediates, so they are
2483 // allowed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002484 if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
Tom Stellardca700e42014-03-17 17:03:49 +00002485 ErrInfo = "Expected immediate, but got non-immediate";
2486 return false;
2487 }
Justin Bognerb03fd122016-08-17 05:10:15 +00002488 LLVM_FALLTHROUGH;
Tom Stellardca700e42014-03-17 17:03:49 +00002489 default:
2490 continue;
2491 }
2492
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002493 if (!MI.getOperand(i).isReg())
Tom Stellardca700e42014-03-17 17:03:49 +00002494 continue;
2495
Tom Stellardca700e42014-03-17 17:03:49 +00002496 if (RegClass != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002497 unsigned Reg = MI.getOperand(i).getReg();
Matt Arsenault1322b6f2016-07-09 01:13:56 +00002498 if (Reg == AMDGPU::NoRegister ||
2499 TargetRegisterInfo::isVirtualRegister(Reg))
Tom Stellardca700e42014-03-17 17:03:49 +00002500 continue;
2501
2502 const TargetRegisterClass *RC = RI.getRegClass(RegClass);
2503 if (!RC->contains(Reg)) {
2504 ErrInfo = "Operand has incorrect register class.";
2505 return false;
2506 }
2507 }
2508 }
2509
Sam Kolton549c89d2017-06-21 08:53:38 +00002510 // Verify SDWA
2511 if (isSDWA(MI)) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002512 if (!ST.hasSDWA()) {
2513 ErrInfo = "SDWA is not supported on this target";
2514 return false;
2515 }
2516
2517 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
Sam Kolton549c89d2017-06-21 08:53:38 +00002518
2519 const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
2520
2521 for (int OpIdx: OpIndicies) {
2522 if (OpIdx == -1)
2523 continue;
2524 const MachineOperand &MO = MI.getOperand(OpIdx);
2525
Sam Kolton3c4933f2017-06-22 06:26:41 +00002526 if (!ST.hasSDWAScalar()) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002527 // Only VGPRS on VI
2528 if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
2529 ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
2530 return false;
2531 }
2532 } else {
2533 // No immediates on GFX9
2534 if (!MO.isReg()) {
2535 ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
2536 return false;
2537 }
2538 }
2539 }
2540
Sam Kolton3c4933f2017-06-22 06:26:41 +00002541 if (!ST.hasSDWAOmod()) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002542 // No omod allowed on VI
2543 const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2544 if (OMod != nullptr &&
2545 (!OMod->isImm() || OMod->getImm() != 0)) {
2546 ErrInfo = "OMod not allowed in SDWA instructions on VI";
2547 return false;
2548 }
2549 }
2550
2551 uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
2552 if (isVOPC(BasicOpcode)) {
Sam Kolton3c4933f2017-06-22 06:26:41 +00002553 if (!ST.hasSDWASdst() && DstIdx != -1) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002554 // Only vcc allowed as dst on VI for VOPC
2555 const MachineOperand &Dst = MI.getOperand(DstIdx);
2556 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
2557 ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
2558 return false;
2559 }
Sam Koltona179d252017-06-27 15:02:23 +00002560 } else if (!ST.hasSDWAOutModsVOPC()) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002561 // No clamp allowed on GFX9 for VOPC
2562 const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
Sam Koltona179d252017-06-27 15:02:23 +00002563 if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
Sam Kolton549c89d2017-06-21 08:53:38 +00002564 ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
2565 return false;
2566 }
Sam Koltona179d252017-06-27 15:02:23 +00002567
2568 // No omod allowed on GFX9 for VOPC
2569 const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2570 if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
2571 ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
2572 return false;
2573 }
Sam Kolton549c89d2017-06-21 08:53:38 +00002574 }
2575 }
2576 }
2577
Tom Stellard93fabce2013-10-10 17:11:55 +00002578 // Verify VOP*
Sam Kolton549c89d2017-06-21 08:53:38 +00002579 if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
Matt Arsenaulte368cb32014-12-11 23:37:32 +00002580 // Only look at the true operands. Only a real operand can use the constant
2581 // bus, and we don't want to check pseudo-operands like the source modifier
2582 // flags.
2583 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2584
Tom Stellard93fabce2013-10-10 17:11:55 +00002585 unsigned ConstantBusCount = 0;
Matt Arsenaultffc82752016-07-05 17:09:01 +00002586
2587 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
2588 ++ConstantBusCount;
2589
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002590 unsigned SGPRUsed = findImplicitSGPRRead(MI);
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00002591 if (SGPRUsed != AMDGPU::NoRegister)
2592 ++ConstantBusCount;
2593
Matt Arsenaulte368cb32014-12-11 23:37:32 +00002594 for (int OpIdx : OpIndices) {
2595 if (OpIdx == -1)
2596 break;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002597 const MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault4bd72362016-12-10 00:39:12 +00002598 if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002599 if (MO.isReg()) {
2600 if (MO.getReg() != SGPRUsed)
Tom Stellard93fabce2013-10-10 17:11:55 +00002601 ++ConstantBusCount;
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002602 SGPRUsed = MO.getReg();
2603 } else {
2604 ++ConstantBusCount;
Tom Stellard93fabce2013-10-10 17:11:55 +00002605 }
2606 }
Tom Stellard93fabce2013-10-10 17:11:55 +00002607 }
2608 if (ConstantBusCount > 1) {
2609 ErrInfo = "VOP* instruction uses the constant bus more than once";
2610 return false;
2611 }
2612 }
2613
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002614 // Verify misc. restrictions on specific instructions.
2615 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
2616 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002617 const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2618 const MachineOperand &Src1 = MI.getOperand(Src1Idx);
2619 const MachineOperand &Src2 = MI.getOperand(Src2Idx);
Matt Arsenaultbecb1402014-06-23 18:28:31 +00002620 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
2621 if (!compareMachineOp(Src0, Src1) &&
2622 !compareMachineOp(Src0, Src2)) {
2623 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
2624 return false;
2625 }
2626 }
2627 }
2628
Matt Arsenault7ccf6cd2016-09-16 21:41:16 +00002629 if (isSOPK(MI)) {
2630 int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2631 if (sopkIsZext(MI)) {
2632 if (!isUInt<16>(Imm)) {
2633 ErrInfo = "invalid immediate for SOPK instruction";
2634 return false;
2635 }
2636 } else {
2637 if (!isInt<16>(Imm)) {
2638 ErrInfo = "invalid immediate for SOPK instruction";
2639 return false;
2640 }
2641 }
2642 }
2643
Matt Arsenaultcb540bc2016-07-19 00:35:03 +00002644 if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
2645 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
2646 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2647 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
2648 const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2649 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
2650
2651 const unsigned StaticNumOps = Desc.getNumOperands() +
2652 Desc.getNumImplicitUses();
2653 const unsigned NumImplicitOps = IsDst ? 2 : 1;
2654
Nicolai Haehnle368972c2016-11-02 17:03:11 +00002655 // Allow additional implicit operands. This allows a fixup done by the post
2656 // RA scheduler where the main implicit operand is killed and implicit-defs
2657 // are added for sub-registers that remain live after this instruction.
2658 if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
Matt Arsenaultcb540bc2016-07-19 00:35:03 +00002659 ErrInfo = "missing implicit register operands";
2660 return false;
2661 }
2662
2663 const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2664 if (IsDst) {
2665 if (!Dst->isUse()) {
2666 ErrInfo = "v_movreld_b32 vdst should be a use operand";
2667 return false;
2668 }
2669
2670 unsigned UseOpIdx;
2671 if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
2672 UseOpIdx != StaticNumOps + 1) {
2673 ErrInfo = "movrel implicit operands should be tied";
2674 return false;
2675 }
2676 }
2677
2678 const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2679 const MachineOperand &ImpUse
2680 = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
2681 if (!ImpUse.isReg() || !ImpUse.isUse() ||
2682 !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
2683 ErrInfo = "src0 should be subreg of implicit vector use";
2684 return false;
2685 }
2686 }
2687
Matt Arsenaultd092a062015-10-02 18:58:37 +00002688 // Make sure we aren't losing exec uses in the td files. This mostly requires
2689 // being careful when using let Uses to try to add other use registers.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002690 if (shouldReadExec(MI)) {
2691 if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
Matt Arsenaultd092a062015-10-02 18:58:37 +00002692 ErrInfo = "VALU instruction does not implicitly read exec mask";
2693 return false;
2694 }
2695 }
2696
Matt Arsenault7b647552016-10-28 21:55:15 +00002697 if (isSMRD(MI)) {
2698 if (MI.mayStore()) {
2699 // The register offset form of scalar stores may only use m0 as the
2700 // soffset register.
2701 const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
2702 if (Soff && Soff->getReg() != AMDGPU::M0) {
2703 ErrInfo = "scalar stores must use m0 as offset register";
2704 return false;
2705 }
2706 }
2707 }
2708
Matt Arsenault89ad17c2017-06-12 16:37:55 +00002709 if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
2710 const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
2711 if (Offset->getImm() != 0) {
2712 ErrInfo = "subtarget does not support offsets in flat instructions";
2713 return false;
2714 }
2715 }
2716
Tom Stellard93fabce2013-10-10 17:11:55 +00002717 return true;
2718}
2719
Matt Arsenaultf14032a2013-11-15 22:02:28 +00002720unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard82166022013-11-13 23:36:37 +00002721 switch (MI.getOpcode()) {
2722 default: return AMDGPU::INSTRUCTION_LIST_END;
2723 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
2724 case AMDGPU::COPY: return AMDGPU::COPY;
2725 case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard204e61b2014-04-07 19:45:45 +00002726 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Connor Abbott8c217d02017-08-04 18:36:49 +00002727 case AMDGPU::WQM: return AMDGPU::WQM;
Connor Abbott92638ab2017-08-04 18:36:52 +00002728 case AMDGPU::WWM: return AMDGPU::WWM;
Tom Stellarde0387202014-03-21 15:51:54 +00002729 case AMDGPU::S_MOV_B32:
2730 return MI.getOperand(1).isReg() ?
Tom Stellard8c12fd92014-03-24 16:12:34 +00002731 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard80942a12014-09-05 14:07:59 +00002732 case AMDGPU::S_ADD_I32:
2733 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
Matt Arsenault43b8e4e2013-11-18 20:09:29 +00002734 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
Tom Stellard80942a12014-09-05 14:07:59 +00002735 case AMDGPU::S_SUB_I32:
2736 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
Matt Arsenault43b8e4e2013-11-18 20:09:29 +00002737 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault869cd072014-09-03 23:24:35 +00002738 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault124384f2016-09-09 23:32:53 +00002739 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
2740 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
2741 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
2742 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
2743 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
2744 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
2745 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
Tom Stellard82166022013-11-13 23:36:37 +00002746 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
2747 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
2748 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
2749 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
2750 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
2751 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault27cc9582014-04-18 01:53:18 +00002752 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
2753 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault78b86702014-04-18 05:19:26 +00002754 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
2755 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak63a7b082015-03-24 13:40:21 +00002756 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault43160e72014-06-18 17:13:57 +00002757 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault2c335622014-04-09 07:16:16 +00002758 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault689f3252014-06-09 16:36:31 +00002759 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault0cb92e12014-04-11 19:25:18 +00002760 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
2761 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
2762 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
2763 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
2764 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
2765 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellardbc4497b2016-02-12 23:45:29 +00002766 case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
2767 case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
2768 case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
2769 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
2770 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
2771 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Matt Arsenault7b1dc2c2016-09-17 02:02:19 +00002772 case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
2773 case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
Marek Olsakc5368502015-01-15 18:43:01 +00002774 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault295b86e2014-06-17 17:36:27 +00002775 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault85796012014-06-17 17:36:24 +00002776 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsakd2af89d2015-03-04 17:33:45 +00002777 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellardbc4497b2016-02-12 23:45:29 +00002778 case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
2779 case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard82166022013-11-13 23:36:37 +00002780 }
2781}
2782
2783bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
2784 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
2785}
2786
2787const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
2788 unsigned OpNo) const {
2789 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2790 const MCInstrDesc &Desc = get(MI.getOpcode());
2791 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
Matt Arsenault102a7042014-12-11 23:37:34 +00002792 Desc.OpInfo[OpNo].RegClass == -1) {
2793 unsigned Reg = MI.getOperand(OpNo).getReg();
2794
2795 if (TargetRegisterInfo::isVirtualRegister(Reg))
2796 return MRI.getRegClass(Reg);
Matt Arsenault11a4d672015-02-13 19:05:03 +00002797 return RI.getPhysRegClass(Reg);
Matt Arsenault102a7042014-12-11 23:37:34 +00002798 }
Tom Stellard82166022013-11-13 23:36:37 +00002799
2800 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
2801 return RI.getRegClass(RCID);
2802}
2803
2804bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
2805 switch (MI.getOpcode()) {
2806 case AMDGPU::COPY:
2807 case AMDGPU::REG_SEQUENCE:
Tom Stellard4f3b04d2014-04-17 21:00:07 +00002808 case AMDGPU::PHI:
Tom Stellarda5687382014-05-15 14:41:55 +00002809 case AMDGPU::INSERT_SUBREG:
Tom Stellard82166022013-11-13 23:36:37 +00002810 return RI.hasVGPRs(getOpRegClass(MI, 0));
2811 default:
2812 return RI.hasVGPRs(getOpRegClass(MI, OpNo));
2813 }
2814}
2815
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002816void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
Tom Stellard82166022013-11-13 23:36:37 +00002817 MachineBasicBlock::iterator I = MI;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002818 MachineBasicBlock *MBB = MI.getParent();
2819 MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002820 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002821 unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
Tom Stellard82166022013-11-13 23:36:37 +00002822 const TargetRegisterClass *RC = RI.getRegClass(RCID);
2823 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002824 if (MO.isReg())
Tom Stellard82166022013-11-13 23:36:37 +00002825 Opcode = AMDGPU::COPY;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002826 else if (RI.isSGPRClass(RC))
Matt Arsenault671a0052013-11-14 10:08:50 +00002827 Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002828
Matt Arsenault3a4d86a2013-11-18 20:09:55 +00002829 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002830 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard0c93c9e2014-09-05 14:08:01 +00002831 VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002832 else
Tom Stellard45c0b3a2015-01-07 20:59:25 +00002833 VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002834
Matt Arsenault3a4d86a2013-11-18 20:09:55 +00002835 unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault3f3a2752014-10-13 15:47:59 +00002836 DebugLoc DL = MBB->findDebugLoc(I);
Diana Picus116bbab2017-01-13 09:58:52 +00002837 BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
Tom Stellard82166022013-11-13 23:36:37 +00002838 MO.ChangeToRegister(Reg, false);
2839}
2840
Tom Stellard15834092014-03-21 15:51:57 +00002841unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
2842 MachineRegisterInfo &MRI,
2843 MachineOperand &SuperReg,
2844 const TargetRegisterClass *SuperRC,
2845 unsigned SubIdx,
2846 const TargetRegisterClass *SubRC)
2847 const {
Matt Arsenaultc8e2ce42015-09-24 07:16:37 +00002848 MachineBasicBlock *MBB = MI->getParent();
2849 DebugLoc DL = MI->getDebugLoc();
Tom Stellard15834092014-03-21 15:51:57 +00002850 unsigned SubReg = MRI.createVirtualRegister(SubRC);
2851
Matt Arsenaultc8e2ce42015-09-24 07:16:37 +00002852 if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
2853 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2854 .addReg(SuperReg.getReg(), 0, SubIdx);
2855 return SubReg;
2856 }
2857
Tom Stellard15834092014-03-21 15:51:57 +00002858 // Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault08d84942014-06-03 23:06:13 +00002859 // value so we don't need to worry about merging its subreg index with the
2860 // SubIdx passed to this function. The register coalescer should be able to
Tom Stellard15834092014-03-21 15:51:57 +00002861 // eliminate this extra copy.
Matt Arsenaultc8e2ce42015-09-24 07:16:37 +00002862 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard15834092014-03-21 15:51:57 +00002863
Matt Arsenault7480a0e2014-11-17 21:11:37 +00002864 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
2865 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
2866
2867 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2868 .addReg(NewSuperReg, 0, SubIdx);
2869
Tom Stellard15834092014-03-21 15:51:57 +00002870 return SubReg;
2871}
2872
Matt Arsenault248b7b62014-03-24 20:08:09 +00002873MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
2874 MachineBasicBlock::iterator MII,
2875 MachineRegisterInfo &MRI,
2876 MachineOperand &Op,
2877 const TargetRegisterClass *SuperRC,
2878 unsigned SubIdx,
2879 const TargetRegisterClass *SubRC) const {
2880 if (Op.isImm()) {
Matt Arsenault248b7b62014-03-24 20:08:09 +00002881 if (SubIdx == AMDGPU::sub0)
Matt Arsenaultd745c282016-09-08 17:44:36 +00002882 return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
Matt Arsenault248b7b62014-03-24 20:08:09 +00002883 if (SubIdx == AMDGPU::sub1)
Matt Arsenaultd745c282016-09-08 17:44:36 +00002884 return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
Matt Arsenault248b7b62014-03-24 20:08:09 +00002885
2886 llvm_unreachable("Unhandled register index for immediate");
2887 }
2888
2889 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
2890 SubIdx, SubRC);
2891 return MachineOperand::CreateReg(SubReg, false);
2892}
2893
Marek Olsakbe047802014-12-07 12:19:03 +00002894// Change the order of operands from (0, 1, 2) to (0, 2, 1)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002895void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
2896 assert(Inst.getNumExplicitOperands() == 3);
2897 MachineOperand Op1 = Inst.getOperand(1);
2898 Inst.RemoveOperand(1);
2899 Inst.addOperand(Op1);
Marek Olsakbe047802014-12-07 12:19:03 +00002900}
2901
Matt Arsenault856d1922015-12-01 19:57:17 +00002902bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
2903 const MCOperandInfo &OpInfo,
2904 const MachineOperand &MO) const {
2905 if (!MO.isReg())
2906 return false;
2907
2908 unsigned Reg = MO.getReg();
2909 const TargetRegisterClass *RC =
2910 TargetRegisterInfo::isVirtualRegister(Reg) ?
2911 MRI.getRegClass(Reg) :
2912 RI.getPhysRegClass(Reg);
2913
Nicolai Haehnle82fc9622016-01-07 17:10:29 +00002914 const SIRegisterInfo *TRI =
2915 static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
2916 RC = TRI->getSubRegClass(RC, MO.getSubReg());
2917
Matt Arsenault856d1922015-12-01 19:57:17 +00002918 // In order to be legal, the common sub-class must be equal to the
2919 // class of the current operand. For example:
2920 //
Sam Kolton1eeb11b2016-09-09 14:44:04 +00002921 // v_mov_b32 s0 ; Operand defined as vsrc_b32
2922 // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
Matt Arsenault856d1922015-12-01 19:57:17 +00002923 //
2924 // s_sendmsg 0, s0 ; Operand defined as m0reg
2925 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
2926
2927 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
2928}
2929
2930bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
2931 const MCOperandInfo &OpInfo,
2932 const MachineOperand &MO) const {
2933 if (MO.isReg())
2934 return isLegalRegOperand(MRI, OpInfo, MO);
2935
2936 // Handle non-register types that are treated like immediates.
2937 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
2938 return true;
2939}
2940
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002941bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
Tom Stellard0e975cf2014-08-01 00:32:35 +00002942 const MachineOperand *MO) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002943 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2944 const MCInstrDesc &InstDesc = MI.getDesc();
Tom Stellard0e975cf2014-08-01 00:32:35 +00002945 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
2946 const TargetRegisterClass *DefinedRC =
2947 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
2948 if (!MO)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002949 MO = &MI.getOperand(OpIdx);
Tom Stellard0e975cf2014-08-01 00:32:35 +00002950
Matt Arsenault4bd72362016-12-10 00:39:12 +00002951 if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
Matt Arsenaultfcb345f2016-02-11 06:15:39 +00002952
2953 RegSubRegPair SGPRUsed;
2954 if (MO->isReg())
2955 SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
2956
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002957 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002958 if (i == OpIdx)
2959 continue;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002960 const MachineOperand &Op = MI.getOperand(i);
Matt Arsenaultffc82752016-07-05 17:09:01 +00002961 if (Op.isReg()) {
2962 if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault4bd72362016-12-10 00:39:12 +00002963 usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
Matt Arsenaultffc82752016-07-05 17:09:01 +00002964 return false;
2965 }
2966 } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002967 return false;
2968 }
2969 }
2970 }
2971
Tom Stellard0e975cf2014-08-01 00:32:35 +00002972 if (MO->isReg()) {
2973 assert(DefinedRC);
Matt Arsenault856d1922015-12-01 19:57:17 +00002974 return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard0e975cf2014-08-01 00:32:35 +00002975 }
2976
Tom Stellard0e975cf2014-08-01 00:32:35 +00002977 // Handle non-register types that are treated like immediates.
Tom Stellardfb77f002015-01-13 22:59:41 +00002978 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
Tom Stellard0e975cf2014-08-01 00:32:35 +00002979
Matt Arsenault4364fef2014-09-23 18:30:57 +00002980 if (!DefinedRC) {
2981 // This operand expects an immediate.
Tom Stellard0e975cf2014-08-01 00:32:35 +00002982 return true;
Matt Arsenault4364fef2014-09-23 18:30:57 +00002983 }
Tom Stellard0e975cf2014-08-01 00:32:35 +00002984
Tom Stellard73ae1cb2014-09-23 21:26:25 +00002985 return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard0e975cf2014-08-01 00:32:35 +00002986}
2987
Matt Arsenault856d1922015-12-01 19:57:17 +00002988void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002989 MachineInstr &MI) const {
2990 unsigned Opc = MI.getOpcode();
Matt Arsenault856d1922015-12-01 19:57:17 +00002991 const MCInstrDesc &InstrDesc = get(Opc);
2992
2993 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002994 MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault856d1922015-12-01 19:57:17 +00002995
2996 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
2997 // we need to only have one constant bus use.
2998 //
2999 // Note we do not need to worry about literal constants here. They are
3000 // disabled for the operand type for instructions because they will always
3001 // violate the one constant bus use rule.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003002 bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
Matt Arsenault856d1922015-12-01 19:57:17 +00003003 if (HasImplicitSGPR) {
3004 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003005 MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault856d1922015-12-01 19:57:17 +00003006
3007 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
3008 legalizeOpWithMove(MI, Src0Idx);
3009 }
3010
3011 // VOP2 src0 instructions support all operand types, so we don't need to check
3012 // their legality. If src1 is already legal, we don't need to do anything.
3013 if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
3014 return;
3015
Nicolai Haehnle5dea6452017-04-24 17:17:36 +00003016 // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
3017 // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
3018 // select is uniform.
3019 if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
3020 RI.isVGPR(MRI, Src1.getReg())) {
3021 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3022 const DebugLoc &DL = MI.getDebugLoc();
3023 BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
3024 .add(Src1);
3025 Src1.ChangeToRegister(Reg, false);
3026 return;
3027 }
3028
Matt Arsenault856d1922015-12-01 19:57:17 +00003029 // We do not use commuteInstruction here because it is too aggressive and will
3030 // commute if it is possible. We only want to commute here if it improves
3031 // legality. This can be called a fairly large number of times so don't waste
3032 // compile time pointlessly swapping and checking legality again.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003033 if (HasImplicitSGPR || !MI.isCommutable()) {
Matt Arsenault856d1922015-12-01 19:57:17 +00003034 legalizeOpWithMove(MI, Src1Idx);
3035 return;
3036 }
3037
3038 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003039 MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault856d1922015-12-01 19:57:17 +00003040
3041 // If src0 can be used as src1, commuting will make the operands legal.
3042 // Otherwise we have to give up and insert a move.
3043 //
3044 // TODO: Other immediate-like operand kinds could be commuted if there was a
3045 // MachineOperand::ChangeTo* for them.
3046 if ((!Src1.isImm() && !Src1.isReg()) ||
3047 !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
3048 legalizeOpWithMove(MI, Src1Idx);
3049 return;
3050 }
3051
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003052 int CommutedOpc = commuteOpcode(MI);
Matt Arsenault856d1922015-12-01 19:57:17 +00003053 if (CommutedOpc == -1) {
3054 legalizeOpWithMove(MI, Src1Idx);
3055 return;
3056 }
3057
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003058 MI.setDesc(get(CommutedOpc));
Matt Arsenault856d1922015-12-01 19:57:17 +00003059
3060 unsigned Src0Reg = Src0.getReg();
3061 unsigned Src0SubReg = Src0.getSubReg();
3062 bool Src0Kill = Src0.isKill();
3063
3064 if (Src1.isImm())
3065 Src0.ChangeToImmediate(Src1.getImm());
3066 else if (Src1.isReg()) {
3067 Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
3068 Src0.setSubReg(Src1.getSubReg());
3069 } else
3070 llvm_unreachable("Should only have register or immediate operands");
3071
3072 Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
3073 Src1.setSubReg(Src0SubReg);
3074}
3075
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003076// Legalize VOP3 operands. Because all operand types are supported for any
3077// operand, and since literal constants are not allowed and should never be
3078// seen, we only need to worry about inserting copies if we use multiple SGPR
3079// operands.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003080void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
3081 MachineInstr &MI) const {
3082 unsigned Opc = MI.getOpcode();
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003083
3084 int VOP3Idx[3] = {
3085 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
3086 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
3087 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
3088 };
3089
3090 // Find the one SGPR operand we are allowed to use.
3091 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
3092
3093 for (unsigned i = 0; i < 3; ++i) {
3094 int Idx = VOP3Idx[i];
3095 if (Idx == -1)
3096 break;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003097 MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003098
3099 // We should never see a VOP3 instruction with an illegal immediate operand.
3100 if (!MO.isReg())
3101 continue;
3102
3103 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
3104 continue; // VGPRs are legal
3105
3106 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
3107 SGPRReg = MO.getReg();
3108 // We can use one SGPR in each VOP3 instruction.
3109 continue;
3110 }
3111
3112 // If we make it this far, then the operand is not legal and we must
3113 // legalize it.
3114 legalizeOpWithMove(MI, Idx);
3115 }
3116}
3117
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003118unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
3119 MachineRegisterInfo &MRI) const {
Tom Stellard1397d492016-02-11 21:45:07 +00003120 const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
3121 const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
3122 unsigned DstReg = MRI.createVirtualRegister(SRC);
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00003123 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
Tom Stellard1397d492016-02-11 21:45:07 +00003124
3125 SmallVector<unsigned, 8> SRegs;
3126 for (unsigned i = 0; i < SubRegs; ++i) {
3127 unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003128 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
Tom Stellard1397d492016-02-11 21:45:07 +00003129 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003130 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
Tom Stellard1397d492016-02-11 21:45:07 +00003131 SRegs.push_back(SGPR);
3132 }
3133
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003134 MachineInstrBuilder MIB =
3135 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3136 get(AMDGPU::REG_SEQUENCE), DstReg);
Tom Stellard1397d492016-02-11 21:45:07 +00003137 for (unsigned i = 0; i < SubRegs; ++i) {
3138 MIB.addReg(SRegs[i]);
3139 MIB.addImm(RI.getSubRegFromChannel(i));
3140 }
3141 return DstReg;
3142}
3143
Tom Stellard467b5b92016-02-20 00:37:25 +00003144void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003145 MachineInstr &MI) const {
Tom Stellard467b5b92016-02-20 00:37:25 +00003146
3147 // If the pointer is store in VGPRs, then we need to move them to
3148 // SGPRs using v_readfirstlane. This is safe because we only select
3149 // loads with uniform pointers to SMRD instruction so we know the
3150 // pointer value is uniform.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003151 MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
Tom Stellard467b5b92016-02-20 00:37:25 +00003152 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
3153 unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
3154 SBase->setReg(SGPR);
3155 }
3156}
3157
Tom Stellard0d162b12016-11-16 18:42:17 +00003158void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
3159 MachineBasicBlock::iterator I,
3160 const TargetRegisterClass *DstRC,
3161 MachineOperand &Op,
3162 MachineRegisterInfo &MRI,
3163 const DebugLoc &DL) const {
Tom Stellard0d162b12016-11-16 18:42:17 +00003164 unsigned OpReg = Op.getReg();
3165 unsigned OpSubReg = Op.getSubReg();
3166
3167 const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
3168 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
3169
3170 // Check if operand is already the correct register class.
3171 if (DstRC == OpRC)
3172 return;
3173
3174 unsigned DstReg = MRI.createVirtualRegister(DstRC);
Diana Picus116bbab2017-01-13 09:58:52 +00003175 MachineInstr *Copy =
3176 BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Tom Stellard0d162b12016-11-16 18:42:17 +00003177
3178 Op.setReg(DstReg);
3179 Op.setSubReg(0);
3180
3181 MachineInstr *Def = MRI.getVRegDef(OpReg);
3182 if (!Def)
3183 return;
3184
3185 // Try to eliminate the copy if it is copying an immediate value.
3186 if (Def->isMoveImmediate())
3187 FoldImmediate(*Copy, *Def, OpReg, &MRI);
3188}
3189
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003190void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003191 MachineFunction &MF = *MI.getParent()->getParent();
3192 MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard82166022013-11-13 23:36:37 +00003193
3194 // Legalize VOP2
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003195 if (isVOP2(MI) || isVOPC(MI)) {
Matt Arsenault856d1922015-12-01 19:57:17 +00003196 legalizeOperandsVOP2(MRI, MI);
Tom Stellard0e975cf2014-08-01 00:32:35 +00003197 return;
Tom Stellard82166022013-11-13 23:36:37 +00003198 }
3199
3200 // Legalize VOP3
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003201 if (isVOP3(MI)) {
Matt Arsenault6005fcb2015-10-21 21:51:02 +00003202 legalizeOperandsVOP3(MRI, MI);
Matt Arsenaulte068f9a2015-09-24 07:51:28 +00003203 return;
Tom Stellard82166022013-11-13 23:36:37 +00003204 }
3205
Tom Stellard467b5b92016-02-20 00:37:25 +00003206 // Legalize SMRD
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003207 if (isSMRD(MI)) {
Tom Stellard467b5b92016-02-20 00:37:25 +00003208 legalizeOperandsSMRD(MRI, MI);
3209 return;
3210 }
3211
Tom Stellard4f3b04d2014-04-17 21:00:07 +00003212 // Legalize REG_SEQUENCE and PHI
Tom Stellard82166022013-11-13 23:36:37 +00003213 // The register class of the operands much be the same type as the register
3214 // class of the output.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003215 if (MI.getOpcode() == AMDGPU::PHI) {
Craig Topper062a2ba2014-04-25 05:30:21 +00003216 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003217 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
3218 if (!MI.getOperand(i).isReg() ||
3219 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
Tom Stellard82166022013-11-13 23:36:37 +00003220 continue;
3221 const TargetRegisterClass *OpRC =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003222 MRI.getRegClass(MI.getOperand(i).getReg());
Tom Stellard82166022013-11-13 23:36:37 +00003223 if (RI.hasVGPRs(OpRC)) {
3224 VRC = OpRC;
3225 } else {
3226 SRC = OpRC;
3227 }
3228 }
3229
3230 // If any of the operands are VGPR registers, then they all most be
3231 // otherwise we will create illegal VGPR->SGPR copies when legalizing
3232 // them.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003233 if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
Tom Stellard82166022013-11-13 23:36:37 +00003234 if (!VRC) {
3235 assert(SRC);
3236 VRC = RI.getEquivalentVGPRClass(SRC);
3237 }
3238 RC = VRC;
3239 } else {
3240 RC = SRC;
3241 }
3242
3243 // Update all the operands so they have the same type.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003244 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3245 MachineOperand &Op = MI.getOperand(I);
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003246 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard82166022013-11-13 23:36:37 +00003247 continue;
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003248
3249 // MI is a PHI instruction.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003250 MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003251 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
3252
Tom Stellard0d162b12016-11-16 18:42:17 +00003253 // Avoid creating no-op copies with the same src and dst reg class. These
3254 // confuse some of the machine passes.
3255 legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003256 }
3257 }
3258
3259 // REG_SEQUENCE doesn't really require operand legalization, but if one has a
3260 // VGPR dest type and SGPR sources, insert copies so all operands are
3261 // VGPRs. This seems to help operand folding / the register coalescer.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003262 if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
3263 MachineBasicBlock *MBB = MI.getParent();
3264 const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003265 if (RI.hasVGPRs(DstRC)) {
3266 // Update all the operands so they are VGPR register classes. These may
3267 // not be the same register class because REG_SEQUENCE supports mixing
3268 // subregister index types e.g. sub0_sub1 + sub2 + sub3
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003269 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3270 MachineOperand &Op = MI.getOperand(I);
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003271 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
3272 continue;
3273
3274 const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
3275 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
3276 if (VRC == OpRC)
3277 continue;
3278
Tom Stellard0d162b12016-11-16 18:42:17 +00003279 legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Matt Arsenault2d6fdb82015-09-25 17:08:42 +00003280 Op.setIsKill();
Tom Stellard4f3b04d2014-04-17 21:00:07 +00003281 }
Tom Stellard82166022013-11-13 23:36:37 +00003282 }
Matt Arsenaulte068f9a2015-09-24 07:51:28 +00003283
3284 return;
Tom Stellard82166022013-11-13 23:36:37 +00003285 }
Tom Stellard15834092014-03-21 15:51:57 +00003286
Tom Stellarda5687382014-05-15 14:41:55 +00003287 // Legalize INSERT_SUBREG
3288 // src0 must have the same register class as dst
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003289 if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
3290 unsigned Dst = MI.getOperand(0).getReg();
3291 unsigned Src0 = MI.getOperand(1).getReg();
Tom Stellarda5687382014-05-15 14:41:55 +00003292 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
3293 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
3294 if (DstRC != Src0RC) {
Tom Stellard0d162b12016-11-16 18:42:17 +00003295 MachineBasicBlock *MBB = MI.getParent();
3296 MachineOperand &Op = MI.getOperand(1);
3297 legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
Tom Stellarda5687382014-05-15 14:41:55 +00003298 }
3299 return;
3300 }
3301
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003302 // Legalize MIMG and MUBUF/MTBUF for shaders.
3303 //
3304 // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
3305 // scratch memory access. In both cases, the legalization never involves
3306 // conversion to the addr64 form.
3307 if (isMIMG(MI) ||
3308 (AMDGPU::isShader(MF.getFunction()->getCallingConv()) &&
3309 (isMUBUF(MI) || isMTBUF(MI)))) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003310 MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
Tom Stellard1397d492016-02-11 21:45:07 +00003311 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
3312 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
3313 SRsrc->setReg(SGPR);
3314 }
3315
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003316 MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
Tom Stellard1397d492016-02-11 21:45:07 +00003317 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
3318 unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
3319 SSamp->setReg(SGPR);
3320 }
3321 return;
3322 }
3323
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003324 // Legalize MUBUF* instructions by converting to addr64 form.
Tom Stellard15834092014-03-21 15:51:57 +00003325 // FIXME: If we start using the non-addr64 instructions for compute, we
Nicolai Haehnlece2b5892016-11-18 11:55:52 +00003326 // may need to legalize them as above. This especially applies to the
3327 // buffer_load_format_* variants and variants with idxen (or bothen).
Tom Stellard155bbb72014-08-11 22:18:17 +00003328 int SRsrcIdx =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003329 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
Tom Stellard155bbb72014-08-11 22:18:17 +00003330 if (SRsrcIdx != -1) {
3331 // We have an MUBUF instruction
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003332 MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
3333 unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
Tom Stellard155bbb72014-08-11 22:18:17 +00003334 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
3335 RI.getRegClass(SRsrcRC))) {
3336 // The operands are legal.
3337 // FIXME: We may need to legalize operands besided srsrc.
3338 return;
3339 }
Tom Stellard15834092014-03-21 15:51:57 +00003340
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003341 MachineBasicBlock &MBB = *MI.getParent();
Matt Arsenaultef67d762015-09-09 17:03:29 +00003342
Eric Christopher572e03a2015-06-19 01:53:21 +00003343 // Extract the ptr from the resource descriptor.
Matt Arsenaultef67d762015-09-09 17:03:29 +00003344 unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
3345 &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
Tom Stellard15834092014-03-21 15:51:57 +00003346
Tom Stellard155bbb72014-08-11 22:18:17 +00003347 // Create an empty resource descriptor
3348 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3349 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3350 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3351 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard794c8c02014-12-02 17:05:41 +00003352 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
Tom Stellard15834092014-03-21 15:51:57 +00003353
Tom Stellard155bbb72014-08-11 22:18:17 +00003354 // Zero64 = 0
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003355 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
3356 .addImm(0);
Tom Stellard15834092014-03-21 15:51:57 +00003357
Tom Stellard155bbb72014-08-11 22:18:17 +00003358 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003359 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
3360 .addImm(RsrcDataFormat & 0xFFFFFFFF);
Tom Stellard15834092014-03-21 15:51:57 +00003361
Tom Stellard155bbb72014-08-11 22:18:17 +00003362 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003363 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
3364 .addImm(RsrcDataFormat >> 32);
Tom Stellard15834092014-03-21 15:51:57 +00003365
Tom Stellard155bbb72014-08-11 22:18:17 +00003366 // NewSRsrc = {Zero64, SRsrcFormat}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003367 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
3368 .addReg(Zero64)
3369 .addImm(AMDGPU::sub0_sub1)
3370 .addReg(SRsrcFormatLo)
3371 .addImm(AMDGPU::sub2)
3372 .addReg(SRsrcFormatHi)
3373 .addImm(AMDGPU::sub3);
Tom Stellard155bbb72014-08-11 22:18:17 +00003374
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003375 MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
Tom Stellard155bbb72014-08-11 22:18:17 +00003376 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard155bbb72014-08-11 22:18:17 +00003377 if (VAddr) {
3378 // This is already an ADDR64 instruction so we need to add the pointer
3379 // extracted from the resource descriptor to the current value of VAddr.
Matt Arsenaultef67d762015-09-09 17:03:29 +00003380 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3381 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard155bbb72014-08-11 22:18:17 +00003382
Matt Arsenaultef67d762015-09-09 17:03:29 +00003383 // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003384 DebugLoc DL = MI.getDebugLoc();
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003385 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Matt Arsenaultef67d762015-09-09 17:03:29 +00003386 .addReg(SRsrcPtr, 0, AMDGPU::sub0)
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003387 .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard15834092014-03-21 15:51:57 +00003388
Matt Arsenaultef67d762015-09-09 17:03:29 +00003389 // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003390 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Matt Arsenaultef67d762015-09-09 17:03:29 +00003391 .addReg(SRsrcPtr, 0, AMDGPU::sub1)
Matt Arsenault51d2d0f2015-09-01 02:02:21 +00003392 .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard15834092014-03-21 15:51:57 +00003393
Matt Arsenaultef67d762015-09-09 17:03:29 +00003394 // NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003395 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
3396 .addReg(NewVAddrLo)
3397 .addImm(AMDGPU::sub0)
3398 .addReg(NewVAddrHi)
3399 .addImm(AMDGPU::sub1);
Tom Stellard155bbb72014-08-11 22:18:17 +00003400 } else {
3401 // This instructions is the _OFFSET variant, so we need to convert it to
3402 // ADDR64.
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003403 assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
3404 < SISubtarget::VOLCANIC_ISLANDS &&
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003405 "FIXME: Need to emit flat atomics here");
3406
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003407 MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
3408 MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
3409 MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
3410 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003411
3412 // Atomics rith return have have an additional tied operand and are
3413 // missing some of the special bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003414 MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003415 MachineInstr *Addr64;
3416
3417 if (!VDataIn) {
3418 // Regular buffer load / store.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003419 MachineInstrBuilder MIB =
3420 BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus116bbab2017-01-13 09:58:52 +00003421 .add(*VData)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003422 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3423 // This will be replaced later
3424 // with the new value of vaddr.
Diana Picus116bbab2017-01-13 09:58:52 +00003425 .add(*SRsrc)
3426 .add(*SOffset)
3427 .add(*Offset);
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003428
3429 // Atomics do not have this operand.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003430 if (const MachineOperand *GLC =
3431 getNamedOperand(MI, AMDGPU::OpName::glc)) {
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003432 MIB.addImm(GLC->getImm());
3433 }
3434
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003435 MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003436
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003437 if (const MachineOperand *TFE =
3438 getNamedOperand(MI, AMDGPU::OpName::tfe)) {
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003439 MIB.addImm(TFE->getImm());
3440 }
3441
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003442 MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003443 Addr64 = MIB;
3444 } else {
3445 // Atomics with return.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003446 Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus116bbab2017-01-13 09:58:52 +00003447 .add(*VData)
3448 .add(*VDataIn)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003449 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3450 // This will be replaced later
3451 // with the new value of vaddr.
Diana Picus116bbab2017-01-13 09:58:52 +00003452 .add(*SRsrc)
3453 .add(*SOffset)
3454 .add(*Offset)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003455 .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
3456 .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
Matt Arsenaulta40450c2015-11-05 02:46:56 +00003457 }
Tom Stellard15834092014-03-21 15:51:57 +00003458
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003459 MI.removeFromParent();
Tom Stellard15834092014-03-21 15:51:57 +00003460
Matt Arsenaultef67d762015-09-09 17:03:29 +00003461 // NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003462 BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
3463 NewVAddr)
3464 .addReg(SRsrcPtr, 0, AMDGPU::sub0)
3465 .addImm(AMDGPU::sub0)
3466 .addReg(SRsrcPtr, 0, AMDGPU::sub1)
3467 .addImm(AMDGPU::sub1);
Matt Arsenaultef67d762015-09-09 17:03:29 +00003468
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003469 VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
3470 SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
Tom Stellard15834092014-03-21 15:51:57 +00003471 }
Tom Stellard155bbb72014-08-11 22:18:17 +00003472
Tom Stellard155bbb72014-08-11 22:18:17 +00003473 // Update the instruction to use NewVaddr
3474 VAddr->setReg(NewVAddr);
3475 // Update the instruction to use NewSRsrc
3476 SRsrc->setReg(NewSRsrc);
Tom Stellard15834092014-03-21 15:51:57 +00003477 }
Tom Stellard82166022013-11-13 23:36:37 +00003478}
3479
3480void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Alfred Huang5b270722017-07-14 17:56:55 +00003481 SetVectorType Worklist;
3482 Worklist.insert(&TopInst);
Tom Stellard82166022013-11-13 23:36:37 +00003483
3484 while (!Worklist.empty()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003485 MachineInstr &Inst = *Worklist.pop_back_val();
3486 MachineBasicBlock *MBB = Inst.getParent();
Tom Stellarde0387202014-03-21 15:51:54 +00003487 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
3488
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003489 unsigned Opcode = Inst.getOpcode();
3490 unsigned NewOpcode = getVALUOp(Inst);
Matt Arsenault27cc9582014-04-18 01:53:18 +00003491
Tom Stellarde0387202014-03-21 15:51:54 +00003492 // Handle some special cases
Matt Arsenault27cc9582014-04-18 01:53:18 +00003493 switch (Opcode) {
Tom Stellard0c354f22014-04-30 15:31:29 +00003494 default:
Tom Stellard0c354f22014-04-30 15:31:29 +00003495 break;
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003496 case AMDGPU::S_AND_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003497 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003498 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003499 continue;
3500
3501 case AMDGPU::S_OR_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003502 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003503 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003504 continue;
3505
3506 case AMDGPU::S_XOR_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003507 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003508 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003509 continue;
3510
3511 case AMDGPU::S_NOT_B64:
Matt Arsenaultf003c382015-08-26 20:47:50 +00003512 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003513 Inst.eraseFromParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003514 continue;
3515
Matt Arsenault8333e432014-06-10 19:18:24 +00003516 case AMDGPU::S_BCNT1_I32_B64:
3517 splitScalar64BitBCNT(Worklist, Inst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003518 Inst.eraseFromParent();
Matt Arsenault8333e432014-06-10 19:18:24 +00003519 continue;
3520
Eugene Zelenko59e12822017-08-08 00:47:13 +00003521 case AMDGPU::S_BFE_I64:
Matt Arsenault94812212014-11-14 18:18:16 +00003522 splitScalar64BitBFE(Worklist, Inst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003523 Inst.eraseFromParent();
Matt Arsenault94812212014-11-14 18:18:16 +00003524 continue;
Matt Arsenault94812212014-11-14 18:18:16 +00003525
Marek Olsakbe047802014-12-07 12:19:03 +00003526 case AMDGPU::S_LSHL_B32:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003527 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsakbe047802014-12-07 12:19:03 +00003528 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
3529 swapOperands(Inst);
3530 }
3531 break;
3532 case AMDGPU::S_ASHR_I32:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003533 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsakbe047802014-12-07 12:19:03 +00003534 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
3535 swapOperands(Inst);
3536 }
3537 break;
3538 case AMDGPU::S_LSHR_B32:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003539 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsakbe047802014-12-07 12:19:03 +00003540 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
3541 swapOperands(Inst);
3542 }
3543 break;
Marek Olsak707a6d02015-02-03 21:53:01 +00003544 case AMDGPU::S_LSHL_B64:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003545 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsak707a6d02015-02-03 21:53:01 +00003546 NewOpcode = AMDGPU::V_LSHLREV_B64;
3547 swapOperands(Inst);
3548 }
3549 break;
3550 case AMDGPU::S_ASHR_I64:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003551 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsak707a6d02015-02-03 21:53:01 +00003552 NewOpcode = AMDGPU::V_ASHRREV_I64;
3553 swapOperands(Inst);
3554 }
3555 break;
3556 case AMDGPU::S_LSHR_B64:
Matt Arsenault43e92fe2016-06-24 06:30:11 +00003557 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
Marek Olsak707a6d02015-02-03 21:53:01 +00003558 NewOpcode = AMDGPU::V_LSHRREV_B64;
3559 swapOperands(Inst);
3560 }
3561 break;
Marek Olsakbe047802014-12-07 12:19:03 +00003562
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003563 case AMDGPU::S_ABS_I32:
3564 lowerScalarAbs(Worklist, Inst);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003565 Inst.eraseFromParent();
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003566 continue;
3567
Tom Stellardbc4497b2016-02-12 23:45:29 +00003568 case AMDGPU::S_CBRANCH_SCC0:
3569 case AMDGPU::S_CBRANCH_SCC1:
3570 // Clear unused bits of vcc
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003571 BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
3572 AMDGPU::VCC)
3573 .addReg(AMDGPU::EXEC)
3574 .addReg(AMDGPU::VCC);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003575 break;
3576
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003577 case AMDGPU::S_BFE_U64:
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003578 case AMDGPU::S_BFM_B64:
3579 llvm_unreachable("Moving this op to VALU not implemented");
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003580
3581 case AMDGPU::S_PACK_LL_B32_B16:
3582 case AMDGPU::S_PACK_LH_B32_B16:
Eugene Zelenko59e12822017-08-08 00:47:13 +00003583 case AMDGPU::S_PACK_HH_B32_B16:
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003584 movePackToVALU(Worklist, MRI, Inst);
3585 Inst.eraseFromParent();
3586 continue;
3587 }
Tom Stellarde0387202014-03-21 15:51:54 +00003588
Tom Stellard15834092014-03-21 15:51:57 +00003589 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
3590 // We cannot move this instruction to the VALU, so we should try to
3591 // legalize its operands instead.
3592 legalizeOperands(Inst);
Tom Stellard82166022013-11-13 23:36:37 +00003593 continue;
Tom Stellard15834092014-03-21 15:51:57 +00003594 }
Tom Stellard82166022013-11-13 23:36:37 +00003595
Tom Stellard82166022013-11-13 23:36:37 +00003596 // Use the new VALU Opcode.
3597 const MCInstrDesc &NewDesc = get(NewOpcode);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003598 Inst.setDesc(NewDesc);
Tom Stellard82166022013-11-13 23:36:37 +00003599
Matt Arsenaultf0b1e3a2013-11-18 20:09:21 +00003600 // Remove any references to SCC. Vector instructions can't read from it, and
3601 // We're just about to add the implicit use / defs of VCC, and we don't want
3602 // both.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003603 for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
3604 MachineOperand &Op = Inst.getOperand(i);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003605 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003606 Inst.RemoveOperand(i);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003607 addSCCDefUsersToVALUWorklist(Inst, Worklist);
3608 }
Matt Arsenaultf0b1e3a2013-11-18 20:09:21 +00003609 }
3610
Matt Arsenault27cc9582014-04-18 01:53:18 +00003611 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
3612 // We are converting these to a BFE, so we need to add the missing
3613 // operands for the size and offset.
3614 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003615 Inst.addOperand(MachineOperand::CreateImm(0));
3616 Inst.addOperand(MachineOperand::CreateImm(Size));
Matt Arsenault27cc9582014-04-18 01:53:18 +00003617
Matt Arsenaultb5b51102014-06-10 19:18:21 +00003618 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
3619 // The VALU version adds the second operand to the result, so insert an
3620 // extra 0 operand.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003621 Inst.addOperand(MachineOperand::CreateImm(0));
Tom Stellard82166022013-11-13 23:36:37 +00003622 }
3623
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003624 Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
Tom Stellard82166022013-11-13 23:36:37 +00003625
Matt Arsenault78b86702014-04-18 05:19:26 +00003626 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003627 const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
Matt Arsenault78b86702014-04-18 05:19:26 +00003628 // If we need to move this to VGPRs, we need to unpack the second operand
3629 // back into the 2 separate ones for bit offset and width.
3630 assert(OffsetWidthOp.isImm() &&
3631 "Scalar BFE is only implemented for constant width and offset");
3632 uint32_t Imm = OffsetWidthOp.getImm();
3633
3634 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3635 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003636 Inst.RemoveOperand(2); // Remove old immediate.
3637 Inst.addOperand(MachineOperand::CreateImm(Offset));
3638 Inst.addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault78b86702014-04-18 05:19:26 +00003639 }
3640
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003641 bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
Tom Stellardbc4497b2016-02-12 23:45:29 +00003642 unsigned NewDstReg = AMDGPU::NoRegister;
3643 if (HasDst) {
Matt Arsenault21a43822017-04-06 21:09:53 +00003644 unsigned DstReg = Inst.getOperand(0).getReg();
3645 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
3646 continue;
3647
Tom Stellardbc4497b2016-02-12 23:45:29 +00003648 // Update the destination register class.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003649 const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
Tom Stellardbc4497b2016-02-12 23:45:29 +00003650 if (!NewDstRC)
3651 continue;
Tom Stellard82166022013-11-13 23:36:37 +00003652
Tom Stellard0d162b12016-11-16 18:42:17 +00003653 if (Inst.isCopy() &&
3654 TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
3655 NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
3656 // Instead of creating a copy where src and dst are the same register
3657 // class, we just replace all uses of dst with src. These kinds of
3658 // copies interfere with the heuristics MachineSink uses to decide
3659 // whether or not to split a critical edge. Since the pass assumes
3660 // that copies will end up as machine instructions and not be
3661 // eliminated.
3662 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
3663 MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
3664 MRI.clearKillFlags(Inst.getOperand(1).getReg());
3665 Inst.getOperand(0).setReg(DstReg);
3666 continue;
3667 }
3668
Tom Stellardbc4497b2016-02-12 23:45:29 +00003669 NewDstReg = MRI.createVirtualRegister(NewDstRC);
3670 MRI.replaceRegWith(DstReg, NewDstReg);
3671 }
Tom Stellard82166022013-11-13 23:36:37 +00003672
Tom Stellarde1a24452014-04-17 21:00:01 +00003673 // Legalize the operands
3674 legalizeOperands(Inst);
3675
Tom Stellardbc4497b2016-02-12 23:45:29 +00003676 if (HasDst)
3677 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard82166022013-11-13 23:36:37 +00003678 }
3679}
3680
Alfred Huang5b270722017-07-14 17:56:55 +00003681void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003682 MachineInstr &Inst) const {
3683 MachineBasicBlock &MBB = *Inst.getParent();
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003684 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3685 MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003686 DebugLoc DL = Inst.getDebugLoc();
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003687
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003688 MachineOperand &Dest = Inst.getOperand(0);
3689 MachineOperand &Src = Inst.getOperand(1);
Marek Olsak7ed6b2f2015-11-25 21:22:45 +00003690 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3691 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3692
3693 BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
3694 .addImm(0)
3695 .addReg(Src.getReg());
3696
3697 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
3698 .addReg(Src.getReg())
3699 .addReg(TmpReg);
3700
3701 MRI.replaceRegWith(Dest.getReg(), ResultReg);
3702 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3703}
3704
Matt Arsenault689f3252014-06-09 16:36:31 +00003705void SIInstrInfo::splitScalar64BitUnaryOp(
Alfred Huang5b270722017-07-14 17:56:55 +00003706 SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003707 unsigned Opcode) const {
3708 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault689f3252014-06-09 16:36:31 +00003709 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3710
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003711 MachineOperand &Dest = Inst.getOperand(0);
3712 MachineOperand &Src0 = Inst.getOperand(1);
3713 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault689f3252014-06-09 16:36:31 +00003714
3715 MachineBasicBlock::iterator MII = Inst;
3716
3717 const MCInstrDesc &InstDesc = get(Opcode);
3718 const TargetRegisterClass *Src0RC = Src0.isReg() ?
3719 MRI.getRegClass(Src0.getReg()) :
3720 &AMDGPU::SGPR_32RegClass;
3721
3722 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3723
3724 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3725 AMDGPU::sub0, Src0SubRC);
3726
3727 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenaultf003c382015-08-26 20:47:50 +00003728 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3729 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault689f3252014-06-09 16:36:31 +00003730
Matt Arsenaultf003c382015-08-26 20:47:50 +00003731 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Diana Picus116bbab2017-01-13 09:58:52 +00003732 BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
Matt Arsenault689f3252014-06-09 16:36:31 +00003733
3734 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3735 AMDGPU::sub1, Src0SubRC);
3736
Matt Arsenaultf003c382015-08-26 20:47:50 +00003737 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Diana Picus116bbab2017-01-13 09:58:52 +00003738 BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
Matt Arsenault689f3252014-06-09 16:36:31 +00003739
Matt Arsenaultf003c382015-08-26 20:47:50 +00003740 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault689f3252014-06-09 16:36:31 +00003741 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3742 .addReg(DestSub0)
3743 .addImm(AMDGPU::sub0)
3744 .addReg(DestSub1)
3745 .addImm(AMDGPU::sub1);
3746
3747 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3748
Matt Arsenaultf003c382015-08-26 20:47:50 +00003749 // We don't need to legalizeOperands here because for a single operand, src0
3750 // will support any kind of input.
3751
3752 // Move all users of this moved value.
3753 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault689f3252014-06-09 16:36:31 +00003754}
3755
3756void SIInstrInfo::splitScalar64BitBinaryOp(
Alfred Huang5b270722017-07-14 17:56:55 +00003757 SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003758 unsigned Opcode) const {
3759 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003760 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3761
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003762 MachineOperand &Dest = Inst.getOperand(0);
3763 MachineOperand &Src0 = Inst.getOperand(1);
3764 MachineOperand &Src1 = Inst.getOperand(2);
3765 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003766
3767 MachineBasicBlock::iterator MII = Inst;
3768
3769 const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault684dc802014-03-24 20:08:13 +00003770 const TargetRegisterClass *Src0RC = Src0.isReg() ?
3771 MRI.getRegClass(Src0.getReg()) :
3772 &AMDGPU::SGPR_32RegClass;
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003773
Matt Arsenault684dc802014-03-24 20:08:13 +00003774 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3775 const TargetRegisterClass *Src1RC = Src1.isReg() ?
3776 MRI.getRegClass(Src1.getReg()) :
3777 &AMDGPU::SGPR_32RegClass;
3778
3779 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
3780
3781 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3782 AMDGPU::sub0, Src0SubRC);
3783 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3784 AMDGPU::sub0, Src1SubRC);
3785
3786 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenaultf003c382015-08-26 20:47:50 +00003787 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3788 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault684dc802014-03-24 20:08:13 +00003789
Matt Arsenaultf003c382015-08-26 20:47:50 +00003790 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003791 MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Diana Picus116bbab2017-01-13 09:58:52 +00003792 .add(SrcReg0Sub0)
3793 .add(SrcReg1Sub0);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003794
Matt Arsenault684dc802014-03-24 20:08:13 +00003795 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3796 AMDGPU::sub1, Src0SubRC);
3797 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3798 AMDGPU::sub1, Src1SubRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003799
Matt Arsenaultf003c382015-08-26 20:47:50 +00003800 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003801 MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Diana Picus116bbab2017-01-13 09:58:52 +00003802 .add(SrcReg0Sub1)
3803 .add(SrcReg1Sub1);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003804
Matt Arsenaultf003c382015-08-26 20:47:50 +00003805 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003806 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3807 .addReg(DestSub0)
3808 .addImm(AMDGPU::sub0)
3809 .addReg(DestSub1)
3810 .addImm(AMDGPU::sub1);
3811
3812 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3813
3814 // Try to legalize the operands in case we need to swap the order to keep it
3815 // valid.
Matt Arsenaultf003c382015-08-26 20:47:50 +00003816 legalizeOperands(LoHalf);
3817 legalizeOperands(HiHalf);
3818
3819 // Move all users of this moved vlaue.
3820 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00003821}
3822
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003823void SIInstrInfo::splitScalar64BitBCNT(
Alfred Huang5b270722017-07-14 17:56:55 +00003824 SetVectorType &Worklist, MachineInstr &Inst) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003825 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault8333e432014-06-10 19:18:24 +00003826 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3827
3828 MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003829 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault8333e432014-06-10 19:18:24 +00003830
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003831 MachineOperand &Dest = Inst.getOperand(0);
3832 MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault8333e432014-06-10 19:18:24 +00003833
Marek Olsakc5368502015-01-15 18:43:01 +00003834 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault8333e432014-06-10 19:18:24 +00003835 const TargetRegisterClass *SrcRC = Src.isReg() ?
3836 MRI.getRegClass(Src.getReg()) :
3837 &AMDGPU::SGPR_32RegClass;
3838
3839 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3840 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3841
3842 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
3843
3844 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
3845 AMDGPU::sub0, SrcSubRC);
3846 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
3847 AMDGPU::sub1, SrcSubRC);
3848
Diana Picus116bbab2017-01-13 09:58:52 +00003849 BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
Matt Arsenault8333e432014-06-10 19:18:24 +00003850
Diana Picus116bbab2017-01-13 09:58:52 +00003851 BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
Matt Arsenault8333e432014-06-10 19:18:24 +00003852
3853 MRI.replaceRegWith(Dest.getReg(), ResultReg);
3854
Matt Arsenault5e7f95e2015-08-26 20:48:04 +00003855 // We don't need to legalize operands here. src0 for etiher instruction can be
3856 // an SGPR, and the second input is unused or determined here.
3857 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault8333e432014-06-10 19:18:24 +00003858}
3859
Alfred Huang5b270722017-07-14 17:56:55 +00003860void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003861 MachineInstr &Inst) const {
3862 MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault94812212014-11-14 18:18:16 +00003863 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3864 MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003865 DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault94812212014-11-14 18:18:16 +00003866
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003867 MachineOperand &Dest = Inst.getOperand(0);
3868 uint32_t Imm = Inst.getOperand(2).getImm();
Matt Arsenault94812212014-11-14 18:18:16 +00003869 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3870 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
3871
Matt Arsenault6ad34262014-11-14 18:40:49 +00003872 (void) Offset;
3873
Matt Arsenault94812212014-11-14 18:18:16 +00003874 // Only sext_inreg cases handled.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003875 assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
3876 Offset == 0 && "Not implemented");
Matt Arsenault94812212014-11-14 18:18:16 +00003877
3878 if (BitWidth < 32) {
3879 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3880 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3881 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3882
3883 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003884 .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
3885 .addImm(0)
3886 .addImm(BitWidth);
Matt Arsenault94812212014-11-14 18:18:16 +00003887
3888 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
3889 .addImm(31)
3890 .addReg(MidRegLo);
3891
3892 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
3893 .addReg(MidRegLo)
3894 .addImm(AMDGPU::sub0)
3895 .addReg(MidRegHi)
3896 .addImm(AMDGPU::sub1);
3897
3898 MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault445833c2015-08-26 20:47:58 +00003899 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault94812212014-11-14 18:18:16 +00003900 return;
3901 }
3902
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003903 MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault94812212014-11-14 18:18:16 +00003904 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3905 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3906
3907 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
3908 .addImm(31)
3909 .addReg(Src.getReg(), 0, AMDGPU::sub0);
3910
3911 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
3912 .addReg(Src.getReg(), 0, AMDGPU::sub0)
3913 .addImm(AMDGPU::sub0)
3914 .addReg(TmpReg)
3915 .addImm(AMDGPU::sub1);
3916
3917 MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault445833c2015-08-26 20:47:58 +00003918 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault94812212014-11-14 18:18:16 +00003919}
3920
Matt Arsenaultf003c382015-08-26 20:47:50 +00003921void SIInstrInfo::addUsersToMoveToVALUWorklist(
3922 unsigned DstReg,
3923 MachineRegisterInfo &MRI,
Alfred Huang5b270722017-07-14 17:56:55 +00003924 SetVectorType &Worklist) const {
Matt Arsenaultf003c382015-08-26 20:47:50 +00003925 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
Matt Arsenault4c1e9ec2016-12-20 18:55:06 +00003926 E = MRI.use_end(); I != E;) {
Matt Arsenaultf003c382015-08-26 20:47:50 +00003927 MachineInstr &UseMI = *I->getParent();
3928 if (!canReadVGPR(UseMI, I.getOperandNo())) {
Alfred Huang5b270722017-07-14 17:56:55 +00003929 Worklist.insert(&UseMI);
Matt Arsenault4c1e9ec2016-12-20 18:55:06 +00003930
3931 do {
3932 ++I;
3933 } while (I != E && I->getParent() == &UseMI);
3934 } else {
3935 ++I;
Matt Arsenaultf003c382015-08-26 20:47:50 +00003936 }
3937 }
3938}
3939
Alfred Huang5b270722017-07-14 17:56:55 +00003940void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003941 MachineRegisterInfo &MRI,
3942 MachineInstr &Inst) const {
3943 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3944 MachineBasicBlock *MBB = Inst.getParent();
3945 MachineOperand &Src0 = Inst.getOperand(1);
3946 MachineOperand &Src1 = Inst.getOperand(2);
3947 const DebugLoc &DL = Inst.getDebugLoc();
3948
3949 switch (Inst.getOpcode()) {
3950 case AMDGPU::S_PACK_LL_B32_B16: {
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003951 unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3952 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003953
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003954 // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
3955 // 0.
3956 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
3957 .addImm(0xffff);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003958
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003959 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
3960 .addReg(ImmReg, RegState::Kill)
3961 .add(Src0);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003962
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +00003963 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
3964 .add(Src1)
3965 .addImm(16)
3966 .addReg(TmpReg, RegState::Kill);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003967 break;
3968 }
3969 case AMDGPU::S_PACK_LH_B32_B16: {
3970 unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3971 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
3972 .addImm(0xffff);
3973 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
3974 .addReg(ImmReg, RegState::Kill)
3975 .add(Src0)
3976 .add(Src1);
3977 break;
3978 }
3979 case AMDGPU::S_PACK_HH_B32_B16: {
3980 unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3981 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3982 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
3983 .addImm(16)
3984 .add(Src0);
3985 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
Konstantin Zhuravlyov88938d42017-04-21 19:35:05 +00003986 .addImm(0xffff0000);
Matt Arsenaulteb522e62017-02-27 22:15:25 +00003987 BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
3988 .add(Src1)
3989 .addReg(ImmReg, RegState::Kill)
3990 .addReg(TmpReg, RegState::Kill);
3991 break;
3992 }
3993 default:
3994 llvm_unreachable("unhandled s_pack_* instruction");
3995 }
3996
3997 MachineOperand &Dest = Inst.getOperand(0);
3998 MRI.replaceRegWith(Dest.getReg(), ResultReg);
3999 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4000}
4001
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004002void SIInstrInfo::addSCCDefUsersToVALUWorklist(
Alfred Huang5b270722017-07-14 17:56:55 +00004003 MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
Tom Stellardbc4497b2016-02-12 23:45:29 +00004004 // This assumes that all the users of SCC are in the same block
4005 // as the SCC def.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +00004006 for (MachineInstr &MI :
Eugene Zelenko59e12822017-08-08 00:47:13 +00004007 make_range(MachineBasicBlock::iterator(SCCDefInst),
4008 SCCDefInst.getParent()->end())) {
Tom Stellardbc4497b2016-02-12 23:45:29 +00004009 // Exit if we find another SCC def.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +00004010 if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
Tom Stellardbc4497b2016-02-12 23:45:29 +00004011 return;
4012
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +00004013 if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
Alfred Huang5b270722017-07-14 17:56:55 +00004014 Worklist.insert(&MI);
Tom Stellardbc4497b2016-02-12 23:45:29 +00004015 }
4016}
4017
Matt Arsenaultba6aae72015-09-28 20:54:57 +00004018const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
4019 const MachineInstr &Inst) const {
4020 const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
4021
4022 switch (Inst.getOpcode()) {
4023 // For target instructions, getOpRegClass just returns the virtual register
4024 // class associated with the operand, so we need to find an equivalent VGPR
4025 // register class in order to move the instruction to the VALU.
4026 case AMDGPU::COPY:
4027 case AMDGPU::PHI:
4028 case AMDGPU::REG_SEQUENCE:
4029 case AMDGPU::INSERT_SUBREG:
Connor Abbott8c217d02017-08-04 18:36:49 +00004030 case AMDGPU::WQM:
Connor Abbott92638ab2017-08-04 18:36:52 +00004031 case AMDGPU::WWM:
Matt Arsenaultba6aae72015-09-28 20:54:57 +00004032 if (RI.hasVGPRs(NewDstRC))
4033 return nullptr;
4034
4035 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
4036 if (!NewDstRC)
4037 return nullptr;
4038 return NewDstRC;
4039 default:
4040 return NewDstRC;
4041 }
4042}
4043
Matt Arsenault6c067412015-11-03 22:30:15 +00004044// Find the one SGPR operand we are allowed to use.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004045unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004046 int OpIndices[3]) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004047 const MCInstrDesc &Desc = MI.getDesc();
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004048
4049 // Find the one SGPR operand we are allowed to use.
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00004050 //
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004051 // First we need to consider the instruction's operand requirements before
4052 // legalizing. Some operands are required to be SGPRs, such as implicit uses
4053 // of VCC, but we are still bound by the constant bus requirement to only use
4054 // one.
4055 //
4056 // If the operand's class is an SGPR, we can never move it.
4057
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004058 unsigned SGPRReg = findImplicitSGPRRead(MI);
Matt Arsenaulte223ceb2015-10-21 21:15:01 +00004059 if (SGPRReg != AMDGPU::NoRegister)
4060 return SGPRReg;
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004061
4062 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004063 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004064
4065 for (unsigned i = 0; i < 3; ++i) {
4066 int Idx = OpIndices[i];
4067 if (Idx == -1)
4068 break;
4069
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004070 const MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault6c067412015-11-03 22:30:15 +00004071 if (!MO.isReg())
4072 continue;
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004073
Matt Arsenault6c067412015-11-03 22:30:15 +00004074 // Is this operand statically required to be an SGPR based on the operand
4075 // constraints?
4076 const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
4077 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
4078 if (IsRequiredSGPR)
4079 return MO.getReg();
4080
4081 // If this could be a VGPR or an SGPR, Check the dynamic register class.
4082 unsigned Reg = MO.getReg();
4083 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
4084 if (RI.isSGPRClass(RegRC))
4085 UsedSGPRs[i] = Reg;
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004086 }
4087
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004088 // We don't have a required SGPR operand, so we have a bit more freedom in
4089 // selecting operands to move.
4090
4091 // Try to select the most used SGPR. If an SGPR is equal to one of the
4092 // others, we choose that.
4093 //
4094 // e.g.
4095 // V_FMA_F32 v0, s0, s0, s0 -> No moves
4096 // V_FMA_F32 v0, s0, s1, s0 -> Move s1
4097
Matt Arsenault6c067412015-11-03 22:30:15 +00004098 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
4099 // prefer those.
4100
Matt Arsenaultee522bf2014-09-26 17:55:06 +00004101 if (UsedSGPRs[0] != AMDGPU::NoRegister) {
4102 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
4103 SGPRReg = UsedSGPRs[0];
4104 }
4105
4106 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
4107 if (UsedSGPRs[1] == UsedSGPRs[2])
4108 SGPRReg = UsedSGPRs[1];
4109 }
4110
4111 return SGPRReg;
4112}
4113
Tom Stellard6407e1e2014-08-01 00:32:33 +00004114MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenaultace5b762014-10-17 18:00:43 +00004115 unsigned OperandName) const {
Tom Stellard1aaad692014-07-21 16:55:33 +00004116 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
4117 if (Idx == -1)
4118 return nullptr;
4119
4120 return &MI.getOperand(Idx);
4121}
Tom Stellard794c8c02014-12-02 17:05:41 +00004122
4123uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
4124 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard4694ed02015-06-26 21:58:42 +00004125 if (ST.isAmdHsaOS()) {
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004126 // Set ATC = 1. GFX9 doesn't have this bit.
4127 if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
4128 RsrcDataFormat |= (1ULL << 56);
Tom Stellard794c8c02014-12-02 17:05:41 +00004129
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004130 // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
4131 // BTW, it disables TC L2 and therefore decreases performance.
4132 if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
Michel Danzerbeb79ce2016-03-16 09:10:35 +00004133 RsrcDataFormat |= (2ULL << 59);
Tom Stellard4694ed02015-06-26 21:58:42 +00004134 }
4135
Tom Stellard794c8c02014-12-02 17:05:41 +00004136 return RsrcDataFormat;
4137}
Marek Olsakd1a69a22015-09-29 23:37:32 +00004138
4139uint64_t SIInstrInfo::getScratchRsrcWords23() const {
4140 uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
4141 AMDGPU::RSRC_TID_ENABLE |
4142 0xffffffff; // Size;
4143
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004144 // GFX9 doesn't have ELEMENT_SIZE.
4145 if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
4146 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
4147 Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
4148 }
Matt Arsenault24ee0782016-02-12 02:40:47 +00004149
Marek Olsak5c7a61d2017-03-21 17:00:39 +00004150 // IndexStride = 64.
4151 Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
Matt Arsenault24ee0782016-02-12 02:40:47 +00004152
Marek Olsakd1a69a22015-09-29 23:37:32 +00004153 // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
4154 // Clear them unless we want a huge stride.
Matt Arsenault43e92fe2016-06-24 06:30:11 +00004155 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
Marek Olsakd1a69a22015-09-29 23:37:32 +00004156 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
4157
4158 return Rsrc23;
4159}
Nicolai Haehnle02c32912016-01-13 16:10:10 +00004160
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004161bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
4162 unsigned Opc = MI.getOpcode();
Nicolai Haehnle02c32912016-01-13 16:10:10 +00004163
4164 return isSMRD(Opc);
4165}
4166
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004167bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
4168 unsigned Opc = MI.getOpcode();
Nicolai Haehnle02c32912016-01-13 16:10:10 +00004169
4170 return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
4171}
Tom Stellard2ff72622016-01-28 16:04:37 +00004172
Matt Arsenault3354f422016-09-10 01:20:33 +00004173unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
4174 int &FrameIndex) const {
4175 const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
4176 if (!Addr || !Addr->isFI())
4177 return AMDGPU::NoRegister;
4178
4179 assert(!MI.memoperands_empty() &&
Yaxun Liu1a14bfa2017-03-27 14:04:01 +00004180 (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
Matt Arsenault3354f422016-09-10 01:20:33 +00004181
4182 FrameIndex = Addr->getIndex();
4183 return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
4184}
4185
4186unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
4187 int &FrameIndex) const {
4188 const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
4189 assert(Addr && Addr->isFI());
4190 FrameIndex = Addr->getIndex();
4191 return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
4192}
4193
4194unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
4195 int &FrameIndex) const {
Matt Arsenault3354f422016-09-10 01:20:33 +00004196 if (!MI.mayLoad())
4197 return AMDGPU::NoRegister;
4198
4199 if (isMUBUF(MI) || isVGPRSpill(MI))
4200 return isStackAccess(MI, FrameIndex);
4201
4202 if (isSGPRSpill(MI))
4203 return isSGPRStackAccess(MI, FrameIndex);
4204
4205 return AMDGPU::NoRegister;
4206}
4207
4208unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
4209 int &FrameIndex) const {
4210 if (!MI.mayStore())
4211 return AMDGPU::NoRegister;
4212
4213 if (isMUBUF(MI) || isVGPRSpill(MI))
4214 return isStackAccess(MI, FrameIndex);
4215
4216 if (isSGPRSpill(MI))
4217 return isSGPRStackAccess(MI, FrameIndex);
4218
4219 return AMDGPU::NoRegister;
4220}
4221
Matt Arsenault02458c22016-06-06 20:10:33 +00004222unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
4223 unsigned Opc = MI.getOpcode();
4224 const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
4225 unsigned DescSize = Desc.getSize();
4226
4227 // If we have a definitive size, we can use it. Otherwise we need to inspect
4228 // the operands to know the size.
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004229 //
4230 // FIXME: Instructions that have a base 32-bit encoding report their size as
4231 // 4, even though they are really 8 bytes if they have a literal operand.
4232 if (DescSize != 0 && DescSize != 4)
Matt Arsenault02458c22016-06-06 20:10:33 +00004233 return DescSize;
4234
Matt Arsenault02458c22016-06-06 20:10:33 +00004235 // 4-byte instructions may have a 32-bit literal encoded after them. Check
4236 // operands that coud ever be literals.
4237 if (isVALU(MI) || isSALU(MI)) {
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +00004238 if (isFixedSize(MI))
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004239 return DescSize;
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004240
Matt Arsenault02458c22016-06-06 20:10:33 +00004241 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4242 if (Src0Idx == -1)
4243 return 4; // No operands.
4244
Matt Arsenault4bd72362016-12-10 00:39:12 +00004245 if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
Matt Arsenault02458c22016-06-06 20:10:33 +00004246 return 8;
4247
4248 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4249 if (Src1Idx == -1)
4250 return 4;
4251
Matt Arsenault4bd72362016-12-10 00:39:12 +00004252 if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
Matt Arsenault02458c22016-06-06 20:10:33 +00004253 return 8;
4254
4255 return 4;
4256 }
4257
Matt Arsenault2d8c2892016-11-01 20:42:24 +00004258 if (DescSize == 4)
4259 return 4;
4260
Matt Arsenault02458c22016-06-06 20:10:33 +00004261 switch (Opc) {
4262 case TargetOpcode::IMPLICIT_DEF:
4263 case TargetOpcode::KILL:
4264 case TargetOpcode::DBG_VALUE:
4265 case TargetOpcode::BUNDLE:
4266 case TargetOpcode::EH_LABEL:
4267 return 0;
4268 case TargetOpcode::INLINEASM: {
4269 const MachineFunction *MF = MI.getParent()->getParent();
4270 const char *AsmStr = MI.getOperand(0).getSymbolName();
4271 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
4272 }
4273 default:
4274 llvm_unreachable("unable to find instruction size");
4275 }
4276}
4277
Tom Stellard6695ba02016-10-28 23:53:48 +00004278bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
4279 if (!isFLAT(MI))
4280 return false;
4281
4282 if (MI.memoperands_empty())
4283 return true;
4284
4285 for (const MachineMemOperand *MMO : MI.memoperands()) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +00004286 if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
Tom Stellard6695ba02016-10-28 23:53:48 +00004287 return true;
4288 }
4289 return false;
4290}
4291
Jan Sjodina06bfe02017-05-15 20:18:37 +00004292bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
4293 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
4294}
4295
4296void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
4297 MachineBasicBlock *IfEnd) const {
4298 MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
4299 assert(TI != IfEntry->end());
4300
4301 MachineInstr *Branch = &(*TI);
4302 MachineFunction *MF = IfEntry->getParent();
4303 MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
4304
4305 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4306 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4307 MachineInstr *SIIF =
4308 BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
4309 .add(Branch->getOperand(0))
4310 .add(Branch->getOperand(1));
4311 MachineInstr *SIEND =
4312 BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
4313 .addReg(DstReg);
4314
4315 IfEntry->erase(TI);
4316 IfEntry->insert(IfEntry->end(), SIIF);
4317 IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
4318 }
4319}
4320
4321void SIInstrInfo::convertNonUniformLoopRegion(
4322 MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
4323 MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
4324 // We expect 2 terminators, one conditional and one unconditional.
4325 assert(TI != LoopEnd->end());
4326
4327 MachineInstr *Branch = &(*TI);
4328 MachineFunction *MF = LoopEnd->getParent();
4329 MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
4330
4331 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4332
4333 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4334 unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4335 MachineInstrBuilder HeaderPHIBuilder =
4336 BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
4337 for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
4338 E = LoopEntry->pred_end();
4339 PI != E; ++PI) {
4340 if (*PI == LoopEnd) {
4341 HeaderPHIBuilder.addReg(BackEdgeReg);
4342 } else {
4343 MachineBasicBlock *PMBB = *PI;
4344 unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4345 materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
4346 ZeroReg, 0);
4347 HeaderPHIBuilder.addReg(ZeroReg);
4348 }
4349 HeaderPHIBuilder.addMBB(*PI);
4350 }
4351 MachineInstr *HeaderPhi = HeaderPHIBuilder;
4352 MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
4353 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
4354 .addReg(DstReg)
4355 .add(Branch->getOperand(0));
4356 MachineInstr *SILOOP =
4357 BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
4358 .addReg(BackEdgeReg)
4359 .addMBB(LoopEntry);
4360
4361 LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
4362 LoopEnd->erase(TI);
4363 LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
4364 LoopEnd->insert(LoopEnd->end(), SILOOP);
4365 }
4366}
4367
Tom Stellard2ff72622016-01-28 16:04:37 +00004368ArrayRef<std::pair<int, const char *>>
4369SIInstrInfo::getSerializableTargetIndices() const {
4370 static const std::pair<int, const char *> TargetIndices[] = {
4371 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
4372 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
4373 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
4374 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
4375 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
4376 return makeArrayRef(TargetIndices);
4377}
Tom Stellardcb6ba622016-04-30 00:23:06 +00004378
4379/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
4380/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
4381ScheduleHazardRecognizer *
4382SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
4383 const ScheduleDAG *DAG) const {
4384 return new GCNHazardRecognizer(DAG->MF);
4385}
4386
4387/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
4388/// pass.
4389ScheduleHazardRecognizer *
4390SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
4391 return new GCNHazardRecognizer(MF);
4392}
Stanislav Mekhanoshin6ec3e3a2017-01-20 00:44:31 +00004393
Matt Arsenault3f031e72017-07-02 23:21:48 +00004394std::pair<unsigned, unsigned>
4395SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4396 return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
4397}
4398
4399ArrayRef<std::pair<unsigned, const char *>>
4400SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4401 static const std::pair<unsigned, const char *> TargetFlags[] = {
4402 { MO_GOTPCREL, "amdgpu-gotprel" },
4403 { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
4404 { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
4405 { MO_REL32_LO, "amdgpu-rel32-lo" },
4406 { MO_REL32_HI, "amdgpu-rel32-hi" }
4407 };
4408
4409 return makeArrayRef(TargetFlags);
4410}
4411
Stanislav Mekhanoshin6ec3e3a2017-01-20 00:44:31 +00004412bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
4413 return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
4414 MI.modifiesRegister(AMDGPU::EXEC, &RI);
4415}
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +00004416
4417MachineInstrBuilder
4418SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
4419 MachineBasicBlock::iterator I,
4420 const DebugLoc &DL,
4421 unsigned DestReg) const {
4422 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4423
4424 unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4425
4426 return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
4427 .addReg(UnusedCarry, RegState::Define | RegState::Dead);
4428}