blob: 045684a68d9ebe7fa1356490efd28fd0a3c81167 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "SIInstrInfo.h"
17#include "AMDGPUTargetMachine.h"
Tom Stellard16a9a202013-08-14 23:24:17 +000018#include "SIDefines.h"
Tom Stellardc149dc02013-11-27 21:23:35 +000019#include "SIMachineFunctionInfo.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
Tom Stellard4e07b1d2014-06-10 21:20:41 +000022#include "llvm/IR/Function.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000023#include "llvm/MC/MCInstrDesc.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000024
25using namespace llvm;
26
Tom Stellard2e59a452014-06-13 01:32:00 +000027SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
28 : AMDGPUInstrInfo(st),
29 RI(st) { }
Tom Stellard75aadc22012-12-11 21:25:42 +000030
Tom Stellard82166022013-11-13 23:36:37 +000031//===----------------------------------------------------------------------===//
32// TargetInstrInfo callbacks
33//===----------------------------------------------------------------------===//
34
Matt Arsenault1acc72f2014-07-29 21:34:55 +000035bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt,
36 unsigned &BaseReg, unsigned &Offset,
37 const TargetRegisterInfo *TRI) const {
38 unsigned Opc = LdSt->getOpcode();
39 if (isDS(Opc)) {
40
41 const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
42 AMDGPU::OpName::offset);
43
44 if (!OffsetImm) {
45 // The 2 offset instructions use offset0 and offset1 instead. This
46 // function only handles simple instructions with only a single offset, so
47 // we ignore them.
48
49 // TODO: Handle consecutive offsets as a single load.
50 return false;
51 }
52
53 const MachineOperand *AddrReg = getNamedOperand(*LdSt,
54 AMDGPU::OpName::addr);
55
56 BaseReg = AddrReg->getReg();
57 Offset = OffsetImm->getImm();
58 return true;
59 }
60
61 if (isMUBUF(Opc) || isMTBUF(Opc)) {
62 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
63 return false;
64
65 const MachineOperand *AddrReg = getNamedOperand(*LdSt,
66 AMDGPU::OpName::vaddr);
67 if (!AddrReg)
68 return false;
69
70 const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
71 AMDGPU::OpName::offset);
72 BaseReg = AddrReg->getReg();
73 Offset = OffsetImm->getImm();
74 return true;
75 }
76
77 if (isSMRD(Opc)) {
78 const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
79 AMDGPU::OpName::offset);
80 if (!OffsetImm)
81 return false;
82
83 const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
84 AMDGPU::OpName::sbase);
85 BaseReg = SBaseReg->getReg();
86 Offset = OffsetImm->getImm();
87 return true;
88 }
89
90 return false;
91}
92
Tom Stellard75aadc22012-12-11 21:25:42 +000093void
94SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Christian Konigd0e3da12013-03-01 09:46:27 +000095 MachineBasicBlock::iterator MI, DebugLoc DL,
96 unsigned DestReg, unsigned SrcReg,
97 bool KillSrc) const {
98
Tom Stellard75aadc22012-12-11 21:25:42 +000099 // If we are trying to copy to or from SCC, there is a bug somewhere else in
100 // the backend. While it may be theoretically possible to do this, it should
101 // never be necessary.
102 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
103
Craig Topper0afd0ab2013-07-15 06:39:13 +0000104 static const int16_t Sub0_15[] = {
Christian Konigd0e3da12013-03-01 09:46:27 +0000105 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
106 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
107 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
108 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
109 };
110
Craig Topper0afd0ab2013-07-15 06:39:13 +0000111 static const int16_t Sub0_7[] = {
Christian Konigd0e3da12013-03-01 09:46:27 +0000112 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
113 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
114 };
115
Craig Topper0afd0ab2013-07-15 06:39:13 +0000116 static const int16_t Sub0_3[] = {
Christian Konigd0e3da12013-03-01 09:46:27 +0000117 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
118 };
119
Craig Topper0afd0ab2013-07-15 06:39:13 +0000120 static const int16_t Sub0_2[] = {
Christian Konig8b1ed282013-04-10 08:39:16 +0000121 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
122 };
123
Craig Topper0afd0ab2013-07-15 06:39:13 +0000124 static const int16_t Sub0_1[] = {
Christian Konigd0e3da12013-03-01 09:46:27 +0000125 AMDGPU::sub0, AMDGPU::sub1, 0
126 };
127
128 unsigned Opcode;
129 const int16_t *SubIndices;
130
Christian Konig082c6612013-03-26 14:04:12 +0000131 if (AMDGPU::M0 == DestReg) {
132 // Check if M0 isn't already set to this value
133 for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
134 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
135
136 if (!I->definesRegister(AMDGPU::M0))
137 continue;
138
139 unsigned Opc = I->getOpcode();
140 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
141 break;
142
143 if (!I->readsRegister(SrcReg))
144 break;
145
146 // The copy isn't necessary
147 return;
148 }
149 }
150
Christian Konigd0e3da12013-03-01 09:46:27 +0000151 if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
152 assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
153 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
154 .addReg(SrcReg, getKillRegState(KillSrc));
155 return;
156
Tom Stellardaac18892013-02-07 19:39:43 +0000157 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
Tom Stellard75aadc22012-12-11 21:25:42 +0000158 assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
159 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
160 .addReg(SrcReg, getKillRegState(KillSrc));
Christian Konigd0e3da12013-03-01 09:46:27 +0000161 return;
162
163 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
164 assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
165 Opcode = AMDGPU::S_MOV_B32;
166 SubIndices = Sub0_3;
167
168 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
169 assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
170 Opcode = AMDGPU::S_MOV_B32;
171 SubIndices = Sub0_7;
172
173 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
174 assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
175 Opcode = AMDGPU::S_MOV_B32;
176 SubIndices = Sub0_15;
177
Tom Stellard75aadc22012-12-11 21:25:42 +0000178 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
179 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +0000180 AMDGPU::SReg_32RegClass.contains(SrcReg));
Tom Stellard75aadc22012-12-11 21:25:42 +0000181 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
182 .addReg(SrcReg, getKillRegState(KillSrc));
Christian Konigd0e3da12013-03-01 09:46:27 +0000183 return;
184
185 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
186 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +0000187 AMDGPU::SReg_64RegClass.contains(SrcReg));
Christian Konigd0e3da12013-03-01 09:46:27 +0000188 Opcode = AMDGPU::V_MOV_B32_e32;
189 SubIndices = Sub0_1;
190
Christian Konig8b1ed282013-04-10 08:39:16 +0000191 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
192 assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
193 Opcode = AMDGPU::V_MOV_B32_e32;
194 SubIndices = Sub0_2;
195
Christian Konigd0e3da12013-03-01 09:46:27 +0000196 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
197 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +0000198 AMDGPU::SReg_128RegClass.contains(SrcReg));
Christian Konigd0e3da12013-03-01 09:46:27 +0000199 Opcode = AMDGPU::V_MOV_B32_e32;
200 SubIndices = Sub0_3;
201
202 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
203 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +0000204 AMDGPU::SReg_256RegClass.contains(SrcReg));
Christian Konigd0e3da12013-03-01 09:46:27 +0000205 Opcode = AMDGPU::V_MOV_B32_e32;
206 SubIndices = Sub0_7;
207
208 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
209 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
NAKAMURA Takumi4bb85f92013-10-28 04:07:23 +0000210 AMDGPU::SReg_512RegClass.contains(SrcReg));
Christian Konigd0e3da12013-03-01 09:46:27 +0000211 Opcode = AMDGPU::V_MOV_B32_e32;
212 SubIndices = Sub0_15;
213
Tom Stellard75aadc22012-12-11 21:25:42 +0000214 } else {
Christian Konigd0e3da12013-03-01 09:46:27 +0000215 llvm_unreachable("Can't copy register!");
216 }
217
218 while (unsigned SubIdx = *SubIndices++) {
219 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
220 get(Opcode), RI.getSubReg(DestReg, SubIdx));
221
222 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
223
224 if (*SubIndices)
225 Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
Tom Stellard75aadc22012-12-11 21:25:42 +0000226 }
227}
228
Christian Konig3c145802013-03-27 09:12:59 +0000229unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
Christian Konig3c145802013-03-27 09:12:59 +0000230 int NewOpc;
231
232 // Try to map original to commuted opcode
233 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
234 return NewOpc;
235
236 // Try to map commuted to original opcode
237 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
238 return NewOpc;
239
240 return Opcode;
241}
242
Tom Stellardc149dc02013-11-27 21:23:35 +0000243void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
244 MachineBasicBlock::iterator MI,
245 unsigned SrcReg, bool isKill,
246 int FrameIndex,
247 const TargetRegisterClass *RC,
248 const TargetRegisterInfo *TRI) const {
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000249 MachineFunction *MF = MBB.getParent();
250 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
251 MachineRegisterInfo &MRI = MF->getRegInfo();
Tom Stellardc149dc02013-11-27 21:23:35 +0000252 DebugLoc DL = MBB.findDebugLoc(MI);
253 unsigned KillFlag = isKill ? RegState::Kill : 0;
254
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000255 if (RI.hasVGPRs(RC)) {
256 LLVMContext &Ctx = MF->getFunction()->getContext();
257 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
258 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
259 .addReg(SrcReg);
260 } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
261 unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
262 unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
Tom Stellardeba61072014-05-02 15:41:42 +0000263
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000264 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
Tom Stellardc149dc02013-11-27 21:23:35 +0000265 .addReg(SrcReg, KillFlag)
266 .addImm(Lane);
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000267 MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
Tom Stellardeba61072014-05-02 15:41:42 +0000268 } else if (RI.isSGPRClass(RC)) {
269 // We are only allowed to create one new instruction when spilling
270 // registers, so we need to use pseudo instruction for vector
271 // registers.
272 //
273 // Reserve a spot in the spill tracker for each sub-register of
274 // the vector register.
275 unsigned NumSubRegs = RC->getSize() / 4;
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000276 unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
Tom Stellardc149dc02013-11-27 21:23:35 +0000277 MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
Tom Stellardeba61072014-05-02 15:41:42 +0000278 FirstLane);
279
280 unsigned Opcode;
281 switch (RC->getSize() * 8) {
282 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
283 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
284 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
285 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
286 default: llvm_unreachable("Cannot spill register class");
Tom Stellardc149dc02013-11-27 21:23:35 +0000287 }
Tom Stellardeba61072014-05-02 15:41:42 +0000288
289 BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
290 .addReg(SrcReg)
291 .addImm(FrameIndex);
292 } else {
293 llvm_unreachable("VGPR spilling not supported");
Tom Stellardc149dc02013-11-27 21:23:35 +0000294 }
295}
296
297void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
298 MachineBasicBlock::iterator MI,
299 unsigned DestReg, int FrameIndex,
300 const TargetRegisterClass *RC,
301 const TargetRegisterInfo *TRI) const {
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000302 MachineFunction *MF = MBB.getParent();
303 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellardc149dc02013-11-27 21:23:35 +0000304 DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellard4e07b1d2014-06-10 21:20:41 +0000305
306 if (RI.hasVGPRs(RC)) {
307 LLVMContext &Ctx = MF->getFunction()->getContext();
308 Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
309 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
310 .addImm(0);
311 } else if (RI.isSGPRClass(RC)){
Tom Stellardeba61072014-05-02 15:41:42 +0000312 unsigned Opcode;
313 switch(RC->getSize() * 8) {
Tom Stellard060ae392014-06-10 21:20:38 +0000314 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
Tom Stellardeba61072014-05-02 15:41:42 +0000315 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
316 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
317 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
318 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
319 default: llvm_unreachable("Cannot spill register class");
Tom Stellardc149dc02013-11-27 21:23:35 +0000320 }
Tom Stellardeba61072014-05-02 15:41:42 +0000321
322 SIMachineFunctionInfo::SpilledReg Spill =
323 MFI->SpillTracker.getSpilledReg(FrameIndex);
324
325 BuildMI(MBB, MI, DL, get(Opcode), DestReg)
326 .addReg(Spill.VGPR)
327 .addImm(FrameIndex);
Tom Stellardeba61072014-05-02 15:41:42 +0000328 } else {
329 llvm_unreachable("VGPR spilling not supported");
Tom Stellardc149dc02013-11-27 21:23:35 +0000330 }
331}
332
Tom Stellardeba61072014-05-02 15:41:42 +0000333static unsigned getNumSubRegsForSpillOp(unsigned Op) {
334
335 switch (Op) {
336 case AMDGPU::SI_SPILL_S512_SAVE:
337 case AMDGPU::SI_SPILL_S512_RESTORE:
338 return 16;
339 case AMDGPU::SI_SPILL_S256_SAVE:
340 case AMDGPU::SI_SPILL_S256_RESTORE:
341 return 8;
342 case AMDGPU::SI_SPILL_S128_SAVE:
343 case AMDGPU::SI_SPILL_S128_RESTORE:
344 return 4;
345 case AMDGPU::SI_SPILL_S64_SAVE:
346 case AMDGPU::SI_SPILL_S64_RESTORE:
347 return 2;
Tom Stellard060ae392014-06-10 21:20:38 +0000348 case AMDGPU::SI_SPILL_S32_RESTORE:
349 return 1;
Tom Stellardeba61072014-05-02 15:41:42 +0000350 default: llvm_unreachable("Invalid spill opcode");
351 }
352}
353
354void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
355 int Count) const {
356 while (Count > 0) {
357 int Arg;
358 if (Count >= 8)
359 Arg = 7;
360 else
361 Arg = Count - 1;
362 Count -= 8;
363 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
364 .addImm(Arg);
365 }
366}
367
368bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
369 SIMachineFunctionInfo *MFI =
370 MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
371 MachineBasicBlock &MBB = *MI->getParent();
372 DebugLoc DL = MBB.findDebugLoc(MI);
373 switch (MI->getOpcode()) {
374 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
375
376 // SGPR register spill
377 case AMDGPU::SI_SPILL_S512_SAVE:
378 case AMDGPU::SI_SPILL_S256_SAVE:
379 case AMDGPU::SI_SPILL_S128_SAVE:
380 case AMDGPU::SI_SPILL_S64_SAVE: {
381 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
382 unsigned FrameIndex = MI->getOperand(2).getImm();
383
384 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
385 SIMachineFunctionInfo::SpilledReg Spill;
386 unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
387 &AMDGPU::SGPR_32RegClass, i);
388 Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
389
390 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
391 MI->getOperand(0).getReg())
392 .addReg(SubReg)
393 .addImm(Spill.Lane + i);
394 }
395 MI->eraseFromParent();
396 break;
397 }
398
399 // SGPR register restore
400 case AMDGPU::SI_SPILL_S512_RESTORE:
401 case AMDGPU::SI_SPILL_S256_RESTORE:
402 case AMDGPU::SI_SPILL_S128_RESTORE:
Tom Stellard060ae392014-06-10 21:20:38 +0000403 case AMDGPU::SI_SPILL_S64_RESTORE:
404 case AMDGPU::SI_SPILL_S32_RESTORE: {
Tom Stellardeba61072014-05-02 15:41:42 +0000405 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
406
407 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
408 SIMachineFunctionInfo::SpilledReg Spill;
409 unsigned FrameIndex = MI->getOperand(2).getImm();
410 unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
411 &AMDGPU::SGPR_32RegClass, i);
412 Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
413
414 BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
415 .addReg(MI->getOperand(1).getReg())
416 .addImm(Spill.Lane + i);
417 }
Tom Stellard060ae392014-06-10 21:20:38 +0000418 insertNOPs(MI, 3);
Tom Stellardeba61072014-05-02 15:41:42 +0000419 MI->eraseFromParent();
420 break;
421 }
Tom Stellard067c8152014-07-21 14:01:14 +0000422 case AMDGPU::SI_CONSTDATA_PTR: {
423 unsigned Reg = MI->getOperand(0).getReg();
424 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
425 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
426
427 BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
428
429 // Add 32-bit offset from this instruction to the start of the constant data.
430 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_I32), RegLo)
431 .addReg(RegLo)
432 .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
433 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
434 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
435 .addReg(RegHi)
436 .addImm(0)
437 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
438 .addReg(AMDGPU::SCC, RegState::Implicit);
439 MI->eraseFromParent();
440 break;
441 }
Tom Stellardeba61072014-05-02 15:41:42 +0000442 }
443 return true;
444}
445
Christian Konig76edd4f2013-02-26 17:52:29 +0000446MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
447 bool NewMI) const {
448
Tom Stellard82166022013-11-13 23:36:37 +0000449 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
450 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
Craig Topper062a2ba2014-04-25 05:30:21 +0000451 return nullptr;
Christian Konig76edd4f2013-02-26 17:52:29 +0000452
Tom Stellard82166022013-11-13 23:36:37 +0000453 // Cannot commute VOP2 if src0 is SGPR.
454 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
455 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
Craig Topper062a2ba2014-04-25 05:30:21 +0000456 return nullptr;
Tom Stellard82166022013-11-13 23:36:37 +0000457
458 if (!MI->getOperand(2).isReg()) {
459 // XXX: Commute instructions with FPImm operands
460 if (NewMI || MI->getOperand(2).isFPImm() ||
461 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
Craig Topper062a2ba2014-04-25 05:30:21 +0000462 return nullptr;
Tom Stellard82166022013-11-13 23:36:37 +0000463 }
464
465 // XXX: Commute VOP3 instructions with abs and neg set.
466 if (isVOP3(MI->getOpcode()) &&
467 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
468 AMDGPU::OpName::abs)).getImm() ||
469 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
470 AMDGPU::OpName::neg)).getImm()))
Craig Topper062a2ba2014-04-25 05:30:21 +0000471 return nullptr;
Tom Stellard82166022013-11-13 23:36:37 +0000472
473 unsigned Reg = MI->getOperand(1).getReg();
Andrew Tricke3398282013-12-17 04:50:45 +0000474 unsigned SubReg = MI->getOperand(1).getSubReg();
Tom Stellard82166022013-11-13 23:36:37 +0000475 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
476 MI->getOperand(2).ChangeToRegister(Reg, false);
Andrew Tricke3398282013-12-17 04:50:45 +0000477 MI->getOperand(2).setSubReg(SubReg);
Tom Stellard82166022013-11-13 23:36:37 +0000478 } else {
479 MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
480 }
Christian Konig3c145802013-03-27 09:12:59 +0000481
482 if (MI)
483 MI->setDesc(get(commuteOpcode(MI->getOpcode())));
484
485 return MI;
Christian Konig76edd4f2013-02-26 17:52:29 +0000486}
487
Tom Stellard26a3b672013-10-22 18:19:10 +0000488MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
489 MachineBasicBlock::iterator I,
490 unsigned DstReg,
491 unsigned SrcReg) const {
Tom Stellard81d871d2013-11-13 23:36:50 +0000492 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
493 DstReg) .addReg(SrcReg);
Tom Stellard26a3b672013-10-22 18:19:10 +0000494}
495
Tom Stellard75aadc22012-12-11 21:25:42 +0000496bool SIInstrInfo::isMov(unsigned Opcode) const {
497 switch(Opcode) {
498 default: return false;
499 case AMDGPU::S_MOV_B32:
500 case AMDGPU::S_MOV_B64:
501 case AMDGPU::V_MOV_B32_e32:
502 case AMDGPU::V_MOV_B32_e64:
Tom Stellard75aadc22012-12-11 21:25:42 +0000503 return true;
504 }
505}
506
507bool
508SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
509 return RC != &AMDGPU::EXECRegRegClass;
510}
Tom Stellardf3b2a1e2013-02-06 17:32:29 +0000511
Tom Stellard30f59412014-03-31 14:01:56 +0000512bool
513SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
514 AliasAnalysis *AA) const {
515 switch(MI->getOpcode()) {
516 default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
517 case AMDGPU::S_MOV_B32:
518 case AMDGPU::S_MOV_B64:
519 case AMDGPU::V_MOV_B32_e32:
520 return MI->getOperand(1).isImm();
521 }
522}
523
Tom Stellard5d7aaae2014-02-10 16:58:30 +0000524namespace llvm {
525namespace AMDGPU {
526// Helper function generated by tablegen. We are wrapping this with
Matt Arsenault57e74d22014-07-29 00:02:40 +0000527// an SIInstrInfo function that returns bool rather than int.
Tom Stellard5d7aaae2014-02-10 16:58:30 +0000528int isDS(uint16_t Opcode);
529}
530}
531
532bool SIInstrInfo::isDS(uint16_t Opcode) const {
533 return ::AMDGPU::isDS(Opcode) != -1;
534}
535
Matt Arsenaultb9f46ee2014-07-28 17:59:38 +0000536bool SIInstrInfo::isMIMG(uint16_t Opcode) const {
Tom Stellard16a9a202013-08-14 23:24:17 +0000537 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
538}
539
Matt Arsenaultb9f46ee2014-07-28 17:59:38 +0000540bool SIInstrInfo::isSMRD(uint16_t Opcode) const {
Michel Danzer20680b12013-08-16 16:19:24 +0000541 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
542}
543
Matt Arsenaulte2fabd32014-07-29 18:51:56 +0000544bool SIInstrInfo::isMUBUF(uint16_t Opcode) const {
545 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
546}
547
548bool SIInstrInfo::isMTBUF(uint16_t Opcode) const {
549 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
550}
551
Tom Stellard93fabce2013-10-10 17:11:55 +0000552bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
553 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
554}
555
556bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
557 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
558}
559
560bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
561 return get(Opcode).TSFlags & SIInstrFlags::VOP3;
562}
563
564bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
565 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
566}
567
Tom Stellard82166022013-11-13 23:36:37 +0000568bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
569 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
570}
571
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +0000572bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
573 int32_t Val = Imm.getSExtValue();
574 if (Val >= -16 && Val <= 64)
575 return true;
Tom Stellardd0084462014-03-17 17:03:52 +0000576
577 // The actual type of the operand does not seem to matter as long
578 // as the bits match one of the inline immediate values. For example:
579 //
580 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
581 // so it is a legal inline immediate.
582 //
583 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
584 // floating-point, so it is a legal inline immediate.
Matt Arsenaultd7bdcc42014-03-31 19:54:27 +0000585
586 return (APInt::floatToBits(0.0f) == Imm) ||
587 (APInt::floatToBits(1.0f) == Imm) ||
588 (APInt::floatToBits(-1.0f) == Imm) ||
589 (APInt::floatToBits(0.5f) == Imm) ||
590 (APInt::floatToBits(-0.5f) == Imm) ||
591 (APInt::floatToBits(2.0f) == Imm) ||
592 (APInt::floatToBits(-2.0f) == Imm) ||
593 (APInt::floatToBits(4.0f) == Imm) ||
594 (APInt::floatToBits(-4.0f) == Imm);
595}
596
597bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
598 if (MO.isImm())
599 return isInlineConstant(APInt(32, MO.getImm(), true));
600
601 if (MO.isFPImm()) {
602 APFloat FpImm = MO.getFPImm()->getValueAPF();
603 return isInlineConstant(FpImm.bitcastToAPInt());
604 }
605
606 return false;
Tom Stellard93fabce2013-10-10 17:11:55 +0000607}
608
609bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
610 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
611}
612
Matt Arsenaultbecb1402014-06-23 18:28:31 +0000613static bool compareMachineOp(const MachineOperand &Op0,
614 const MachineOperand &Op1) {
615 if (Op0.getType() != Op1.getType())
616 return false;
617
618 switch (Op0.getType()) {
619 case MachineOperand::MO_Register:
620 return Op0.getReg() == Op1.getReg();
621 case MachineOperand::MO_Immediate:
622 return Op0.getImm() == Op1.getImm();
623 case MachineOperand::MO_FPImmediate:
624 return Op0.getFPImm() == Op1.getFPImm();
625 default:
626 llvm_unreachable("Didn't expect to be comparing these operand types");
627 }
628}
629
Tom Stellardb02094e2014-07-21 15:45:01 +0000630bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
631 const MachineOperand &MO) const {
632 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
633
634 assert(MO.isImm() || MO.isFPImm());
635
636 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
637 return true;
638
639 if (OpInfo.RegClass < 0)
640 return false;
641
642 return RI.regClassCanUseImmediate(OpInfo.RegClass);
643}
644
Tom Stellard93fabce2013-10-10 17:11:55 +0000645bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
646 StringRef &ErrInfo) const {
647 uint16_t Opcode = MI->getOpcode();
648 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
649 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
650 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
651
Tom Stellardca700e42014-03-17 17:03:49 +0000652 // Make sure the number of operands is correct.
653 const MCInstrDesc &Desc = get(Opcode);
654 if (!Desc.isVariadic() &&
655 Desc.getNumOperands() != MI->getNumExplicitOperands()) {
656 ErrInfo = "Instruction has wrong number of operands.";
657 return false;
658 }
659
660 // Make sure the register classes are correct
661 for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
662 switch (Desc.OpInfo[i].OperandType) {
Tom Stellarda305f932014-07-02 20:53:44 +0000663 case MCOI::OPERAND_REGISTER: {
664 int RegClass = Desc.OpInfo[i].RegClass;
665 if (!RI.regClassCanUseImmediate(RegClass) &&
666 (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
667 ErrInfo = "Expected register, but got immediate";
668 return false;
669 }
670 }
Tom Stellardca700e42014-03-17 17:03:49 +0000671 break;
672 case MCOI::OPERAND_IMMEDIATE:
Tom Stellardb02094e2014-07-21 15:45:01 +0000673 // Check if this operand is an immediate.
674 // FrameIndex operands will be replaced by immediates, so they are
675 // allowed.
676 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() &&
677 !MI->getOperand(i).isFI()) {
Tom Stellardca700e42014-03-17 17:03:49 +0000678 ErrInfo = "Expected immediate, but got non-immediate";
679 return false;
680 }
681 // Fall-through
682 default:
683 continue;
684 }
685
686 if (!MI->getOperand(i).isReg())
687 continue;
688
689 int RegClass = Desc.OpInfo[i].RegClass;
690 if (RegClass != -1) {
691 unsigned Reg = MI->getOperand(i).getReg();
692 if (TargetRegisterInfo::isVirtualRegister(Reg))
693 continue;
694
695 const TargetRegisterClass *RC = RI.getRegClass(RegClass);
696 if (!RC->contains(Reg)) {
697 ErrInfo = "Operand has incorrect register class.";
698 return false;
699 }
700 }
701 }
702
703
Tom Stellard93fabce2013-10-10 17:11:55 +0000704 // Verify VOP*
705 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
706 unsigned ConstantBusCount = 0;
707 unsigned SGPRUsed = AMDGPU::NoRegister;
Tom Stellard93fabce2013-10-10 17:11:55 +0000708 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
709 const MachineOperand &MO = MI->getOperand(i);
710 if (MO.isReg() && MO.isUse() &&
711 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
712
713 // EXEC register uses the constant bus.
714 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
715 ++ConstantBusCount;
716
717 // SGPRs use the constant bus
718 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
719 (!MO.isImplicit() &&
720 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
721 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
722 if (SGPRUsed != MO.getReg()) {
723 ++ConstantBusCount;
724 SGPRUsed = MO.getReg();
725 }
726 }
727 }
728 // Literal constants use the constant bus.
729 if (isLiteralConstant(MO))
730 ++ConstantBusCount;
731 }
732 if (ConstantBusCount > 1) {
733 ErrInfo = "VOP* instruction uses the constant bus more than once";
734 return false;
735 }
736 }
737
738 // Verify SRC1 for VOP2 and VOPC
739 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
740 const MachineOperand &Src1 = MI->getOperand(Src1Idx);
Tom Stellard82166022013-11-13 23:36:37 +0000741 if (Src1.isImm() || Src1.isFPImm()) {
Tom Stellard93fabce2013-10-10 17:11:55 +0000742 ErrInfo = "VOP[2C] src1 cannot be an immediate.";
743 return false;
744 }
745 }
746
747 // Verify VOP3
748 if (isVOP3(Opcode)) {
749 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
750 ErrInfo = "VOP3 src0 cannot be a literal constant.";
751 return false;
752 }
753 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
754 ErrInfo = "VOP3 src1 cannot be a literal constant.";
755 return false;
756 }
757 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
758 ErrInfo = "VOP3 src2 cannot be a literal constant.";
759 return false;
760 }
761 }
Matt Arsenaultbecb1402014-06-23 18:28:31 +0000762
763 // Verify misc. restrictions on specific instructions.
764 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
765 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
766 MI->dump();
767
768 const MachineOperand &Src0 = MI->getOperand(2);
769 const MachineOperand &Src1 = MI->getOperand(3);
770 const MachineOperand &Src2 = MI->getOperand(4);
771 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
772 if (!compareMachineOp(Src0, Src1) &&
773 !compareMachineOp(Src0, Src2)) {
774 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
775 return false;
776 }
777 }
778 }
779
Tom Stellard93fabce2013-10-10 17:11:55 +0000780 return true;
781}
782
Matt Arsenaultf14032a2013-11-15 22:02:28 +0000783unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard82166022013-11-13 23:36:37 +0000784 switch (MI.getOpcode()) {
785 default: return AMDGPU::INSTRUCTION_LIST_END;
786 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
787 case AMDGPU::COPY: return AMDGPU::COPY;
788 case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard204e61b2014-04-07 19:45:45 +0000789 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Tom Stellarde0387202014-03-21 15:51:54 +0000790 case AMDGPU::S_MOV_B32:
791 return MI.getOperand(1).isReg() ?
Tom Stellard8c12fd92014-03-24 16:12:34 +0000792 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Matt Arsenault43b8e4e2013-11-18 20:09:29 +0000793 case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
794 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
795 case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
796 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault8e2581b2014-03-21 18:01:18 +0000797 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
798 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
799 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
800 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
801 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
802 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
803 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
Tom Stellard82166022013-11-13 23:36:37 +0000804 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
805 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
806 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
807 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
808 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
809 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault27cc9582014-04-18 01:53:18 +0000810 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
811 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault78b86702014-04-18 05:19:26 +0000812 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
813 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Matt Arsenault43160e72014-06-18 17:13:57 +0000814 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault2c335622014-04-09 07:16:16 +0000815 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault689f3252014-06-09 16:36:31 +0000816 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault0cb92e12014-04-11 19:25:18 +0000817 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
818 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
819 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
820 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
821 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
822 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard4c00b522014-05-09 16:42:22 +0000823 case AMDGPU::S_LOAD_DWORD_IMM:
Tom Stellard0c354f22014-04-30 15:31:29 +0000824 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
Tom Stellard4c00b522014-05-09 16:42:22 +0000825 case AMDGPU::S_LOAD_DWORDX2_IMM:
Tom Stellard0c354f22014-04-30 15:31:29 +0000826 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
Tom Stellard4c00b522014-05-09 16:42:22 +0000827 case AMDGPU::S_LOAD_DWORDX4_IMM:
Tom Stellard0c354f22014-04-30 15:31:29 +0000828 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000829 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
Matt Arsenault295b86e2014-06-17 17:36:27 +0000830 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault85796012014-06-17 17:36:24 +0000831 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Tom Stellard82166022013-11-13 23:36:37 +0000832 }
833}
834
835bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
836 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
837}
838
839const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
840 unsigned OpNo) const {
841 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
842 const MCInstrDesc &Desc = get(MI.getOpcode());
843 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
844 Desc.OpInfo[OpNo].RegClass == -1)
845 return MRI.getRegClass(MI.getOperand(OpNo).getReg());
846
847 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
848 return RI.getRegClass(RCID);
849}
850
851bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
852 switch (MI.getOpcode()) {
853 case AMDGPU::COPY:
854 case AMDGPU::REG_SEQUENCE:
Tom Stellard4f3b04d2014-04-17 21:00:07 +0000855 case AMDGPU::PHI:
Tom Stellarda5687382014-05-15 14:41:55 +0000856 case AMDGPU::INSERT_SUBREG:
Tom Stellard82166022013-11-13 23:36:37 +0000857 return RI.hasVGPRs(getOpRegClass(MI, 0));
858 default:
859 return RI.hasVGPRs(getOpRegClass(MI, OpNo));
860 }
861}
862
863void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
864 MachineBasicBlock::iterator I = MI;
865 MachineOperand &MO = MI->getOperand(OpIdx);
866 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
867 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
868 const TargetRegisterClass *RC = RI.getRegClass(RCID);
869 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
870 if (MO.isReg()) {
871 Opcode = AMDGPU::COPY;
872 } else if (RI.isSGPRClass(RC)) {
Matt Arsenault671a0052013-11-14 10:08:50 +0000873 Opcode = AMDGPU::S_MOV_B32;
Tom Stellard82166022013-11-13 23:36:37 +0000874 }
875
Matt Arsenault3a4d86a2013-11-18 20:09:55 +0000876 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
877 unsigned Reg = MRI.createVirtualRegister(VRC);
Tom Stellard82166022013-11-13 23:36:37 +0000878 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
879 Reg).addOperand(MO);
880 MO.ChangeToRegister(Reg, false);
881}
882
Tom Stellard15834092014-03-21 15:51:57 +0000883unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
884 MachineRegisterInfo &MRI,
885 MachineOperand &SuperReg,
886 const TargetRegisterClass *SuperRC,
887 unsigned SubIdx,
888 const TargetRegisterClass *SubRC)
889 const {
890 assert(SuperReg.isReg());
891
892 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
893 unsigned SubReg = MRI.createVirtualRegister(SubRC);
894
895 // Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault08d84942014-06-03 23:06:13 +0000896 // value so we don't need to worry about merging its subreg index with the
897 // SubIdx passed to this function. The register coalescer should be able to
Tom Stellard15834092014-03-21 15:51:57 +0000898 // eliminate this extra copy.
899 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
900 NewSuperReg)
901 .addOperand(SuperReg);
902
903 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
904 SubReg)
905 .addReg(NewSuperReg, 0, SubIdx);
906 return SubReg;
907}
908
Matt Arsenault248b7b62014-03-24 20:08:09 +0000909MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
910 MachineBasicBlock::iterator MII,
911 MachineRegisterInfo &MRI,
912 MachineOperand &Op,
913 const TargetRegisterClass *SuperRC,
914 unsigned SubIdx,
915 const TargetRegisterClass *SubRC) const {
916 if (Op.isImm()) {
917 // XXX - Is there a better way to do this?
918 if (SubIdx == AMDGPU::sub0)
919 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
920 if (SubIdx == AMDGPU::sub1)
921 return MachineOperand::CreateImm(Op.getImm() >> 32);
922
923 llvm_unreachable("Unhandled register index for immediate");
924 }
925
926 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
927 SubIdx, SubRC);
928 return MachineOperand::CreateReg(SubReg, false);
929}
930
Matt Arsenaultbd995802014-03-24 18:26:52 +0000931unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
932 MachineBasicBlock::iterator MI,
933 MachineRegisterInfo &MRI,
934 const TargetRegisterClass *RC,
935 const MachineOperand &Op) const {
936 MachineBasicBlock *MBB = MI->getParent();
937 DebugLoc DL = MI->getDebugLoc();
938 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
939 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
940 unsigned Dst = MRI.createVirtualRegister(RC);
941
942 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
943 LoDst)
944 .addImm(Op.getImm() & 0xFFFFFFFF);
945 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
946 HiDst)
947 .addImm(Op.getImm() >> 32);
948
949 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
950 .addReg(LoDst)
951 .addImm(AMDGPU::sub0)
952 .addReg(HiDst)
953 .addImm(AMDGPU::sub1);
954
955 Worklist.push_back(Lo);
956 Worklist.push_back(Hi);
957
958 return Dst;
959}
960
Tom Stellard82166022013-11-13 23:36:37 +0000961void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
962 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
963 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
964 AMDGPU::OpName::src0);
965 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
966 AMDGPU::OpName::src1);
967 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
968 AMDGPU::OpName::src2);
969
970 // Legalize VOP2
971 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
Matt Arsenault08f7e372013-11-18 20:09:50 +0000972 MachineOperand &Src0 = MI->getOperand(Src0Idx);
Tom Stellard82166022013-11-13 23:36:37 +0000973 MachineOperand &Src1 = MI->getOperand(Src1Idx);
Matt Arsenaultf4760452013-11-14 08:06:38 +0000974
Matt Arsenault08f7e372013-11-18 20:09:50 +0000975 // If the instruction implicitly reads VCC, we can't have any SGPR operands,
976 // so move any.
977 bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
978 if (ReadsVCC && Src0.isReg() &&
979 RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
980 legalizeOpWithMove(MI, Src0Idx);
981 return;
982 }
983
984 if (ReadsVCC && Src1.isReg() &&
985 RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
986 legalizeOpWithMove(MI, Src1Idx);
987 return;
988 }
989
Matt Arsenaultf4760452013-11-14 08:06:38 +0000990 // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
991 // be the first operand, and there can only be one.
Tom Stellard82166022013-11-13 23:36:37 +0000992 if (Src1.isImm() || Src1.isFPImm() ||
993 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
994 if (MI->isCommutable()) {
995 if (commuteInstruction(MI))
996 return;
997 }
998 legalizeOpWithMove(MI, Src1Idx);
999 }
1000 }
1001
Matt Arsenault08f7e372013-11-18 20:09:50 +00001002 // XXX - Do any VOP3 instructions read VCC?
Tom Stellard82166022013-11-13 23:36:37 +00001003 // Legalize VOP3
1004 if (isVOP3(MI->getOpcode())) {
1005 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
1006 unsigned SGPRReg = AMDGPU::NoRegister;
1007 for (unsigned i = 0; i < 3; ++i) {
1008 int Idx = VOP3Idx[i];
1009 if (Idx == -1)
1010 continue;
1011 MachineOperand &MO = MI->getOperand(Idx);
1012
1013 if (MO.isReg()) {
1014 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
1015 continue; // VGPRs are legal
1016
Matt Arsenaultf0b1e3a2013-11-18 20:09:21 +00001017 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
1018
Tom Stellard82166022013-11-13 23:36:37 +00001019 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
1020 SGPRReg = MO.getReg();
1021 // We can use one SGPR in each VOP3 instruction.
1022 continue;
1023 }
1024 } else if (!isLiteralConstant(MO)) {
1025 // If it is not a register and not a literal constant, then it must be
1026 // an inline constant which is always legal.
1027 continue;
1028 }
1029 // If we make it this far, then the operand is not legal and we must
1030 // legalize it.
1031 legalizeOpWithMove(MI, Idx);
1032 }
1033 }
1034
Tom Stellard4f3b04d2014-04-17 21:00:07 +00001035 // Legalize REG_SEQUENCE and PHI
Tom Stellard82166022013-11-13 23:36:37 +00001036 // The register class of the operands much be the same type as the register
1037 // class of the output.
Tom Stellard4f3b04d2014-04-17 21:00:07 +00001038 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
1039 MI->getOpcode() == AMDGPU::PHI) {
Craig Topper062a2ba2014-04-25 05:30:21 +00001040 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
Tom Stellard82166022013-11-13 23:36:37 +00001041 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
1042 if (!MI->getOperand(i).isReg() ||
1043 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
1044 continue;
1045 const TargetRegisterClass *OpRC =
1046 MRI.getRegClass(MI->getOperand(i).getReg());
1047 if (RI.hasVGPRs(OpRC)) {
1048 VRC = OpRC;
1049 } else {
1050 SRC = OpRC;
1051 }
1052 }
1053
1054 // If any of the operands are VGPR registers, then they all most be
1055 // otherwise we will create illegal VGPR->SGPR copies when legalizing
1056 // them.
1057 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
1058 if (!VRC) {
1059 assert(SRC);
1060 VRC = RI.getEquivalentVGPRClass(SRC);
1061 }
1062 RC = VRC;
1063 } else {
1064 RC = SRC;
1065 }
1066
1067 // Update all the operands so they have the same type.
1068 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
1069 if (!MI->getOperand(i).isReg() ||
1070 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
1071 continue;
1072 unsigned DstReg = MRI.createVirtualRegister(RC);
Tom Stellard4f3b04d2014-04-17 21:00:07 +00001073 MachineBasicBlock *InsertBB;
1074 MachineBasicBlock::iterator Insert;
1075 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
1076 InsertBB = MI->getParent();
1077 Insert = MI;
1078 } else {
1079 // MI is a PHI instruction.
1080 InsertBB = MI->getOperand(i + 1).getMBB();
1081 Insert = InsertBB->getFirstTerminator();
1082 }
1083 BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
Tom Stellard82166022013-11-13 23:36:37 +00001084 get(AMDGPU::COPY), DstReg)
1085 .addOperand(MI->getOperand(i));
1086 MI->getOperand(i).setReg(DstReg);
1087 }
1088 }
Tom Stellard15834092014-03-21 15:51:57 +00001089
Tom Stellarda5687382014-05-15 14:41:55 +00001090 // Legalize INSERT_SUBREG
1091 // src0 must have the same register class as dst
1092 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
1093 unsigned Dst = MI->getOperand(0).getReg();
1094 unsigned Src0 = MI->getOperand(1).getReg();
1095 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
1096 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
1097 if (DstRC != Src0RC) {
1098 MachineBasicBlock &MBB = *MI->getParent();
1099 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
1100 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
1101 .addReg(Src0);
1102 MI->getOperand(1).setReg(NewSrc0);
1103 }
1104 return;
1105 }
1106
Tom Stellard15834092014-03-21 15:51:57 +00001107 // Legalize MUBUF* instructions
1108 // FIXME: If we start using the non-addr64 instructions for compute, we
1109 // may need to legalize them here.
1110
1111 int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1112 AMDGPU::OpName::srsrc);
1113 int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1114 AMDGPU::OpName::vaddr);
1115 if (SRsrcIdx != -1 && VAddrIdx != -1) {
1116 const TargetRegisterClass *VAddrRC =
1117 RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
1118
1119 if(VAddrRC->getSize() == 8 &&
1120 MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
1121 // We have a MUBUF instruction that uses a 64-bit vaddr register and
1122 // srsrc has the incorrect register class. In order to fix this, we
1123 // need to extract the pointer from the resource descriptor (srsrc),
1124 // add it to the value of vadd, then store the result in the vaddr
1125 // operand. Then, we need to set the pointer field of the resource
1126 // descriptor to zero.
1127
1128 MachineBasicBlock &MBB = *MI->getParent();
1129 MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
1130 MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
1131 unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
1132 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1133 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1134 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
1135 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1136 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1137 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1138 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1139
1140 // SRsrcPtrLo = srsrc:sub0
1141 SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
1142 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
1143
1144 // SRsrcPtrHi = srsrc:sub1
1145 SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
1146 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
1147
1148 // VAddrLo = vaddr:sub0
1149 VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
1150 &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
1151
1152 // VAddrHi = vaddr:sub1
1153 VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
1154 &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
1155
1156 // NewVaddrLo = SRsrcPtrLo + VAddrLo
1157 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
1158 NewVAddrLo)
1159 .addReg(SRsrcPtrLo)
1160 .addReg(VAddrLo)
1161 .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
1162
1163 // NewVaddrHi = SRsrcPtrHi + VAddrHi
1164 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
1165 NewVAddrHi)
1166 .addReg(SRsrcPtrHi)
1167 .addReg(VAddrHi)
1168 .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
1169 .addReg(AMDGPU::VCC, RegState::Implicit);
1170
1171 // NewVaddr = {NewVaddrHi, NewVaddrLo}
1172 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1173 NewVAddr)
1174 .addReg(NewVAddrLo)
1175 .addImm(AMDGPU::sub0)
1176 .addReg(NewVAddrHi)
1177 .addImm(AMDGPU::sub1);
1178
1179 // Zero64 = 0
1180 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
1181 Zero64)
1182 .addImm(0);
1183
1184 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
1185 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1186 SRsrcFormatLo)
1187 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
1188
1189 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
1190 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1191 SRsrcFormatHi)
1192 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
1193
1194 // NewSRsrc = {Zero64, SRsrcFormat}
1195 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1196 NewSRsrc)
1197 .addReg(Zero64)
1198 .addImm(AMDGPU::sub0_sub1)
1199 .addReg(SRsrcFormatLo)
1200 .addImm(AMDGPU::sub2)
1201 .addReg(SRsrcFormatHi)
1202 .addImm(AMDGPU::sub3);
1203
1204 // Update the instruction to use NewVaddr
1205 MI->getOperand(VAddrIdx).setReg(NewVAddr);
1206 // Update the instruction to use NewSRsrc
1207 MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
1208 }
1209 }
Tom Stellard82166022013-11-13 23:36:37 +00001210}
1211
Tom Stellard0c354f22014-04-30 15:31:29 +00001212void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
1213 MachineBasicBlock *MBB = MI->getParent();
1214 switch (MI->getOpcode()) {
Tom Stellard4c00b522014-05-09 16:42:22 +00001215 case AMDGPU::S_LOAD_DWORD_IMM:
Tom Stellard0c354f22014-04-30 15:31:29 +00001216 case AMDGPU::S_LOAD_DWORD_SGPR:
Tom Stellard4c00b522014-05-09 16:42:22 +00001217 case AMDGPU::S_LOAD_DWORDX2_IMM:
Tom Stellard0c354f22014-04-30 15:31:29 +00001218 case AMDGPU::S_LOAD_DWORDX2_SGPR:
Tom Stellard4c00b522014-05-09 16:42:22 +00001219 case AMDGPU::S_LOAD_DWORDX4_IMM:
Tom Stellard0c354f22014-04-30 15:31:29 +00001220 case AMDGPU::S_LOAD_DWORDX4_SGPR:
1221 unsigned NewOpcode = getVALUOp(*MI);
Tom Stellard4c00b522014-05-09 16:42:22 +00001222 unsigned RegOffset;
1223 unsigned ImmOffset;
Tom Stellard0c354f22014-04-30 15:31:29 +00001224
Tom Stellard4c00b522014-05-09 16:42:22 +00001225 if (MI->getOperand(2).isReg()) {
1226 RegOffset = MI->getOperand(2).getReg();
1227 ImmOffset = 0;
1228 } else {
1229 assert(MI->getOperand(2).isImm());
1230 // SMRD instructions take a dword offsets and MUBUF instructions
1231 // take a byte offset.
1232 ImmOffset = MI->getOperand(2).getImm() << 2;
1233 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1234 if (isUInt<12>(ImmOffset)) {
1235 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1236 RegOffset)
1237 .addImm(0);
1238 } else {
1239 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1240 RegOffset)
1241 .addImm(ImmOffset);
1242 ImmOffset = 0;
1243 }
1244 }
Tom Stellard0c354f22014-04-30 15:31:29 +00001245
1246 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard4c00b522014-05-09 16:42:22 +00001247 unsigned DWord0 = RegOffset;
Tom Stellard0c354f22014-04-30 15:31:29 +00001248 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1249 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1250 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1251
1252 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
1253 .addImm(0);
1254 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
1255 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
1256 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
1257 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
1258 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
1259 .addReg(DWord0)
1260 .addImm(AMDGPU::sub0)
1261 .addReg(DWord1)
1262 .addImm(AMDGPU::sub1)
1263 .addReg(DWord2)
1264 .addImm(AMDGPU::sub2)
1265 .addReg(DWord3)
1266 .addImm(AMDGPU::sub3);
1267 MI->setDesc(get(NewOpcode));
Tom Stellard4c00b522014-05-09 16:42:22 +00001268 if (MI->getOperand(2).isReg()) {
1269 MI->getOperand(2).setReg(MI->getOperand(1).getReg());
1270 } else {
1271 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
1272 }
Tom Stellard0c354f22014-04-30 15:31:29 +00001273 MI->getOperand(1).setReg(SRsrc);
Tom Stellard4c00b522014-05-09 16:42:22 +00001274 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
Tom Stellard0c354f22014-04-30 15:31:29 +00001275 }
1276}
1277
Tom Stellard82166022013-11-13 23:36:37 +00001278void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
1279 SmallVector<MachineInstr *, 128> Worklist;
1280 Worklist.push_back(&TopInst);
1281
1282 while (!Worklist.empty()) {
1283 MachineInstr *Inst = Worklist.pop_back_val();
Tom Stellarde0387202014-03-21 15:51:54 +00001284 MachineBasicBlock *MBB = Inst->getParent();
1285 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1286
Matt Arsenault27cc9582014-04-18 01:53:18 +00001287 unsigned Opcode = Inst->getOpcode();
Tom Stellard0c354f22014-04-30 15:31:29 +00001288 unsigned NewOpcode = getVALUOp(*Inst);
Matt Arsenault27cc9582014-04-18 01:53:18 +00001289
Tom Stellarde0387202014-03-21 15:51:54 +00001290 // Handle some special cases
Matt Arsenault27cc9582014-04-18 01:53:18 +00001291 switch (Opcode) {
Tom Stellard0c354f22014-04-30 15:31:29 +00001292 default:
1293 if (isSMRD(Inst->getOpcode())) {
1294 moveSMRDToVALU(Inst, MRI);
1295 }
1296 break;
Matt Arsenaultbd995802014-03-24 18:26:52 +00001297 case AMDGPU::S_MOV_B64: {
1298 DebugLoc DL = Inst->getDebugLoc();
Tom Stellarde0387202014-03-21 15:51:54 +00001299
Matt Arsenaultbd995802014-03-24 18:26:52 +00001300 // If the source operand is a register we can replace this with a
1301 // copy.
1302 if (Inst->getOperand(1).isReg()) {
1303 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
1304 .addOperand(Inst->getOperand(0))
1305 .addOperand(Inst->getOperand(1));
1306 Worklist.push_back(Copy);
1307 } else {
1308 // Otherwise, we need to split this into two movs, because there is
1309 // no 64-bit VALU move instruction.
1310 unsigned Reg = Inst->getOperand(0).getReg();
1311 unsigned Dst = split64BitImm(Worklist,
1312 Inst,
1313 MRI,
1314 MRI.getRegClass(Reg),
1315 Inst->getOperand(1));
1316 MRI.replaceRegWith(Reg, Dst);
Tom Stellarde0387202014-03-21 15:51:54 +00001317 }
Matt Arsenaultbd995802014-03-24 18:26:52 +00001318 Inst->eraseFromParent();
1319 continue;
1320 }
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001321 case AMDGPU::S_AND_B64:
Matt Arsenault689f3252014-06-09 16:36:31 +00001322 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001323 Inst->eraseFromParent();
1324 continue;
1325
1326 case AMDGPU::S_OR_B64:
Matt Arsenault689f3252014-06-09 16:36:31 +00001327 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001328 Inst->eraseFromParent();
1329 continue;
1330
1331 case AMDGPU::S_XOR_B64:
Matt Arsenault689f3252014-06-09 16:36:31 +00001332 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001333 Inst->eraseFromParent();
1334 continue;
1335
1336 case AMDGPU::S_NOT_B64:
Matt Arsenault689f3252014-06-09 16:36:31 +00001337 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001338 Inst->eraseFromParent();
1339 continue;
1340
Matt Arsenault8333e432014-06-10 19:18:24 +00001341 case AMDGPU::S_BCNT1_I32_B64:
1342 splitScalar64BitBCNT(Worklist, Inst);
1343 Inst->eraseFromParent();
1344 continue;
1345
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001346 case AMDGPU::S_BFE_U64:
1347 case AMDGPU::S_BFE_I64:
1348 case AMDGPU::S_BFM_B64:
1349 llvm_unreachable("Moving this op to VALU not implemented");
Tom Stellarde0387202014-03-21 15:51:54 +00001350 }
1351
Tom Stellard15834092014-03-21 15:51:57 +00001352 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
1353 // We cannot move this instruction to the VALU, so we should try to
1354 // legalize its operands instead.
1355 legalizeOperands(Inst);
Tom Stellard82166022013-11-13 23:36:37 +00001356 continue;
Tom Stellard15834092014-03-21 15:51:57 +00001357 }
Tom Stellard82166022013-11-13 23:36:37 +00001358
Tom Stellard82166022013-11-13 23:36:37 +00001359 // Use the new VALU Opcode.
1360 const MCInstrDesc &NewDesc = get(NewOpcode);
1361 Inst->setDesc(NewDesc);
1362
Matt Arsenaultf0b1e3a2013-11-18 20:09:21 +00001363 // Remove any references to SCC. Vector instructions can't read from it, and
1364 // We're just about to add the implicit use / defs of VCC, and we don't want
1365 // both.
1366 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
1367 MachineOperand &Op = Inst->getOperand(i);
1368 if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
1369 Inst->RemoveOperand(i);
1370 }
1371
Matt Arsenault27cc9582014-04-18 01:53:18 +00001372 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
1373 // We are converting these to a BFE, so we need to add the missing
1374 // operands for the size and offset.
1375 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
Vincent Lejeune94af31f2014-05-10 19:18:33 +00001376 Inst->addOperand(Inst->getOperand(1));
1377 Inst->getOperand(1).ChangeToImmediate(0);
1378 Inst->addOperand(MachineOperand::CreateImm(0));
1379 Inst->addOperand(MachineOperand::CreateImm(0));
Matt Arsenault27cc9582014-04-18 01:53:18 +00001380 Inst->addOperand(MachineOperand::CreateImm(0));
1381 Inst->addOperand(MachineOperand::CreateImm(Size));
1382
1383 // XXX - Other pointless operands. There are 4, but it seems you only need
1384 // 3 to not hit an assertion later in MCInstLower.
1385 Inst->addOperand(MachineOperand::CreateImm(0));
1386 Inst->addOperand(MachineOperand::CreateImm(0));
Matt Arsenaultb5b51102014-06-10 19:18:21 +00001387 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
1388 // The VALU version adds the second operand to the result, so insert an
1389 // extra 0 operand.
1390 Inst->addOperand(MachineOperand::CreateImm(0));
Tom Stellard82166022013-11-13 23:36:37 +00001391 }
1392
Matt Arsenault27cc9582014-04-18 01:53:18 +00001393 addDescImplicitUseDef(NewDesc, Inst);
Tom Stellard82166022013-11-13 23:36:37 +00001394
Matt Arsenault78b86702014-04-18 05:19:26 +00001395 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
1396 const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
1397 // If we need to move this to VGPRs, we need to unpack the second operand
1398 // back into the 2 separate ones for bit offset and width.
1399 assert(OffsetWidthOp.isImm() &&
1400 "Scalar BFE is only implemented for constant width and offset");
1401 uint32_t Imm = OffsetWidthOp.getImm();
1402
1403 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
1404 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
1405
1406 Inst->RemoveOperand(2); // Remove old immediate.
Vincent Lejeune94af31f2014-05-10 19:18:33 +00001407 Inst->addOperand(Inst->getOperand(1));
1408 Inst->getOperand(1).ChangeToImmediate(0);
Matt Arsenault4b0402e2014-05-13 23:45:50 +00001409 Inst->addOperand(MachineOperand::CreateImm(0));
Matt Arsenault78b86702014-04-18 05:19:26 +00001410 Inst->addOperand(MachineOperand::CreateImm(Offset));
Matt Arsenault78b86702014-04-18 05:19:26 +00001411 Inst->addOperand(MachineOperand::CreateImm(0));
Vincent Lejeune94af31f2014-05-10 19:18:33 +00001412 Inst->addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault78b86702014-04-18 05:19:26 +00001413 Inst->addOperand(MachineOperand::CreateImm(0));
1414 Inst->addOperand(MachineOperand::CreateImm(0));
Matt Arsenault78b86702014-04-18 05:19:26 +00001415 }
1416
Tom Stellard82166022013-11-13 23:36:37 +00001417 // Update the destination register class.
Tom Stellarde1a24452014-04-17 21:00:01 +00001418
Tom Stellard82166022013-11-13 23:36:37 +00001419 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
1420
Matt Arsenault27cc9582014-04-18 01:53:18 +00001421 switch (Opcode) {
Tom Stellard82166022013-11-13 23:36:37 +00001422 // For target instructions, getOpRegClass just returns the virtual
1423 // register class associated with the operand, so we need to find an
1424 // equivalent VGPR register class in order to move the instruction to the
1425 // VALU.
1426 case AMDGPU::COPY:
1427 case AMDGPU::PHI:
1428 case AMDGPU::REG_SEQUENCE:
Tom Stellard204e61b2014-04-07 19:45:45 +00001429 case AMDGPU::INSERT_SUBREG:
Tom Stellard82166022013-11-13 23:36:37 +00001430 if (RI.hasVGPRs(NewDstRC))
1431 continue;
1432 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
1433 if (!NewDstRC)
1434 continue;
1435 break;
1436 default:
1437 break;
1438 }
1439
1440 unsigned DstReg = Inst->getOperand(0).getReg();
1441 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
1442 MRI.replaceRegWith(DstReg, NewDstReg);
1443
Tom Stellarde1a24452014-04-17 21:00:01 +00001444 // Legalize the operands
1445 legalizeOperands(Inst);
1446
Tom Stellard82166022013-11-13 23:36:37 +00001447 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
1448 E = MRI.use_end(); I != E; ++I) {
Owen Anderson16c6bf42014-03-13 23:12:04 +00001449 MachineInstr &UseMI = *I->getParent();
Tom Stellard82166022013-11-13 23:36:37 +00001450 if (!canReadVGPR(UseMI, I.getOperandNo())) {
1451 Worklist.push_back(&UseMI);
1452 }
1453 }
1454 }
1455}
1456
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001457//===----------------------------------------------------------------------===//
1458// Indirect addressing callbacks
1459//===----------------------------------------------------------------------===//
1460
1461unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
1462 unsigned Channel) const {
1463 assert(Channel == 0);
1464 return RegIndex;
1465}
1466
Tom Stellard26a3b672013-10-22 18:19:10 +00001467const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
Tom Stellard81d871d2013-11-13 23:36:50 +00001468 return &AMDGPU::VReg_32RegClass;
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001469}
1470
Matt Arsenault689f3252014-06-09 16:36:31 +00001471void SIInstrInfo::splitScalar64BitUnaryOp(
1472 SmallVectorImpl<MachineInstr *> &Worklist,
1473 MachineInstr *Inst,
1474 unsigned Opcode) const {
1475 MachineBasicBlock &MBB = *Inst->getParent();
1476 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1477
1478 MachineOperand &Dest = Inst->getOperand(0);
1479 MachineOperand &Src0 = Inst->getOperand(1);
1480 DebugLoc DL = Inst->getDebugLoc();
1481
1482 MachineBasicBlock::iterator MII = Inst;
1483
1484 const MCInstrDesc &InstDesc = get(Opcode);
1485 const TargetRegisterClass *Src0RC = Src0.isReg() ?
1486 MRI.getRegClass(Src0.getReg()) :
1487 &AMDGPU::SGPR_32RegClass;
1488
1489 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
1490
1491 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1492 AMDGPU::sub0, Src0SubRC);
1493
1494 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
1495 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
1496
1497 unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
1498 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
1499 .addOperand(SrcReg0Sub0);
1500
1501 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1502 AMDGPU::sub1, Src0SubRC);
1503
1504 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
1505 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
1506 .addOperand(SrcReg0Sub1);
1507
1508 unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
1509 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
1510 .addReg(DestSub0)
1511 .addImm(AMDGPU::sub0)
1512 .addReg(DestSub1)
1513 .addImm(AMDGPU::sub1);
1514
1515 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
1516
1517 // Try to legalize the operands in case we need to swap the order to keep it
1518 // valid.
1519 Worklist.push_back(LoHalf);
1520 Worklist.push_back(HiHalf);
1521}
1522
1523void SIInstrInfo::splitScalar64BitBinaryOp(
1524 SmallVectorImpl<MachineInstr *> &Worklist,
1525 MachineInstr *Inst,
1526 unsigned Opcode) const {
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001527 MachineBasicBlock &MBB = *Inst->getParent();
1528 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1529
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001530 MachineOperand &Dest = Inst->getOperand(0);
1531 MachineOperand &Src0 = Inst->getOperand(1);
1532 MachineOperand &Src1 = Inst->getOperand(2);
1533 DebugLoc DL = Inst->getDebugLoc();
1534
1535 MachineBasicBlock::iterator MII = Inst;
1536
1537 const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault684dc802014-03-24 20:08:13 +00001538 const TargetRegisterClass *Src0RC = Src0.isReg() ?
1539 MRI.getRegClass(Src0.getReg()) :
1540 &AMDGPU::SGPR_32RegClass;
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001541
Matt Arsenault684dc802014-03-24 20:08:13 +00001542 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
1543 const TargetRegisterClass *Src1RC = Src1.isReg() ?
1544 MRI.getRegClass(Src1.getReg()) :
1545 &AMDGPU::SGPR_32RegClass;
1546
1547 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
1548
1549 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1550 AMDGPU::sub0, Src0SubRC);
1551 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1552 AMDGPU::sub0, Src1SubRC);
1553
1554 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
1555 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
1556
1557 unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001558 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault248b7b62014-03-24 20:08:09 +00001559 .addOperand(SrcReg0Sub0)
1560 .addOperand(SrcReg1Sub0);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001561
Matt Arsenault684dc802014-03-24 20:08:13 +00001562 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1563 AMDGPU::sub1, Src0SubRC);
1564 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1565 AMDGPU::sub1, Src1SubRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001566
Matt Arsenault684dc802014-03-24 20:08:13 +00001567 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001568 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault248b7b62014-03-24 20:08:09 +00001569 .addOperand(SrcReg0Sub1)
1570 .addOperand(SrcReg1Sub1);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001571
Matt Arsenault684dc802014-03-24 20:08:13 +00001572 unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
Matt Arsenaultf35182c2014-03-24 20:08:05 +00001573 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
1574 .addReg(DestSub0)
1575 .addImm(AMDGPU::sub0)
1576 .addReg(DestSub1)
1577 .addImm(AMDGPU::sub1);
1578
1579 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
1580
1581 // Try to legalize the operands in case we need to swap the order to keep it
1582 // valid.
1583 Worklist.push_back(LoHalf);
1584 Worklist.push_back(HiHalf);
1585}
1586
Matt Arsenault8333e432014-06-10 19:18:24 +00001587void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
1588 MachineInstr *Inst) const {
1589 MachineBasicBlock &MBB = *Inst->getParent();
1590 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1591
1592 MachineBasicBlock::iterator MII = Inst;
1593 DebugLoc DL = Inst->getDebugLoc();
1594
1595 MachineOperand &Dest = Inst->getOperand(0);
1596 MachineOperand &Src = Inst->getOperand(1);
1597
1598 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
1599 const TargetRegisterClass *SrcRC = Src.isReg() ?
1600 MRI.getRegClass(Src.getReg()) :
1601 &AMDGPU::SGPR_32RegClass;
1602
1603 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1604 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1605
1606 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
1607
1608 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
1609 AMDGPU::sub0, SrcSubRC);
1610 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
1611 AMDGPU::sub1, SrcSubRC);
1612
1613 MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
1614 .addOperand(SrcRegSub0)
1615 .addImm(0);
1616
1617 MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
1618 .addOperand(SrcRegSub1)
1619 .addReg(MidReg);
1620
1621 MRI.replaceRegWith(Dest.getReg(), ResultReg);
1622
1623 Worklist.push_back(First);
1624 Worklist.push_back(Second);
1625}
1626
Matt Arsenault27cc9582014-04-18 01:53:18 +00001627void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
1628 MachineInstr *Inst) const {
1629 // Add the implict and explicit register definitions.
1630 if (NewDesc.ImplicitUses) {
1631 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
1632 unsigned Reg = NewDesc.ImplicitUses[i];
1633 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
1634 }
1635 }
1636
1637 if (NewDesc.ImplicitDefs) {
1638 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
1639 unsigned Reg = NewDesc.ImplicitDefs[i];
1640 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
1641 }
1642 }
1643}
1644
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001645MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
1646 MachineBasicBlock *MBB,
1647 MachineBasicBlock::iterator I,
1648 unsigned ValueReg,
1649 unsigned Address, unsigned OffsetReg) const {
Tom Stellard81d871d2013-11-13 23:36:50 +00001650 const DebugLoc &DL = MBB->findDebugLoc(I);
1651 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
1652 getIndirectIndexBegin(*MBB->getParent()));
1653
1654 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
1655 .addReg(IndirectBaseReg, RegState::Define)
1656 .addOperand(I->getOperand(0))
1657 .addReg(IndirectBaseReg)
1658 .addReg(OffsetReg)
1659 .addImm(0)
1660 .addReg(ValueReg);
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001661}
1662
1663MachineInstrBuilder SIInstrInfo::buildIndirectRead(
1664 MachineBasicBlock *MBB,
1665 MachineBasicBlock::iterator I,
1666 unsigned ValueReg,
1667 unsigned Address, unsigned OffsetReg) const {
Tom Stellard81d871d2013-11-13 23:36:50 +00001668 const DebugLoc &DL = MBB->findDebugLoc(I);
1669 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
1670 getIndirectIndexBegin(*MBB->getParent()));
1671
1672 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
1673 .addOperand(I->getOperand(0))
1674 .addOperand(I->getOperand(1))
1675 .addReg(IndirectBaseReg)
1676 .addReg(OffsetReg)
1677 .addImm(0);
1678
1679}
1680
1681void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1682 const MachineFunction &MF) const {
1683 int End = getIndirectIndexEnd(MF);
1684 int Begin = getIndirectIndexBegin(MF);
1685
1686 if (End == -1)
1687 return;
1688
1689
1690 for (int Index = Begin; Index <= End; ++Index)
1691 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
1692
Tom Stellard415ef6d2013-11-13 23:58:51 +00001693 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
Tom Stellard81d871d2013-11-13 23:36:50 +00001694 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
1695
Tom Stellard415ef6d2013-11-13 23:58:51 +00001696 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
Tom Stellard81d871d2013-11-13 23:36:50 +00001697 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
1698
Tom Stellard415ef6d2013-11-13 23:58:51 +00001699 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
Tom Stellard81d871d2013-11-13 23:36:50 +00001700 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
1701
Tom Stellard415ef6d2013-11-13 23:58:51 +00001702 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
Tom Stellard81d871d2013-11-13 23:36:50 +00001703 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
1704
Tom Stellard415ef6d2013-11-13 23:58:51 +00001705 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Tom Stellard81d871d2013-11-13 23:36:50 +00001706 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
Tom Stellardf3b2a1e2013-02-06 17:32:29 +00001707}
Tom Stellard1aaad692014-07-21 16:55:33 +00001708
1709const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI,
1710 unsigned OperandName) const {
1711 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
1712 if (Idx == -1)
1713 return nullptr;
1714
1715 return &MI.getOperand(Idx);
1716}