blob: 0fe3b6845f4570a2f22d31d177beec8d9e220952 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Implementation of the TargetInstrInfo class that is common to all
12/// AMD GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUTargetMachine.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22
Chandler Carruthd174b722014-04-22 02:03:14 +000023using namespace llvm;
24
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000025#define GET_INSTRINFO_CTOR_DTOR
Tom Stellard02661d92013-06-25 21:22:18 +000026#define GET_INSTRINFO_NAMED_OPS
Christian Konigf741fbf2013-02-26 17:52:42 +000027#define GET_INSTRMAP_INFO
Tom Stellard75aadc22012-12-11 21:25:42 +000028#include "AMDGPUGenInstrInfo.inc"
29
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000030// Pin the vtable to this file.
31void AMDGPUInstrInfo::anchor() {}
32
Tom Stellard2e59a452014-06-13 01:32:00 +000033AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
Eric Christopher6c5b5112015-03-11 18:43:21 +000034 : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000035
36const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
37 return RI;
38}
39
Matt Arsenault034d6662014-07-24 02:10:17 +000040bool AMDGPUInstrInfo::enableClusterLoads() const {
41 return true;
42}
43
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000044// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
45// the first 16 loads will be interleaved with the stores, and the next 16 will
46// be clustered as expected. It should really split into 2 16 store batches.
47//
48// Loads are clustered until this returns false, rather than trying to schedule
49// groups of stores. This also means we have to deal with saying different
50// address space loads should be clustered, and ones which might cause bank
51// conflicts.
52//
53// This might be deprecated so it might not be worth that much effort to fix.
54bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
55 int64_t Offset0, int64_t Offset1,
56 unsigned NumLoads) const {
57 assert(Offset1 > Offset0 &&
58 "Second offset should be larger than first offset!");
59 // If we have less than 16 loads in a row, and the offsets are within 64
60 // bytes, then schedule together.
61
62 // A cacheline is 64 bytes (for global memory).
63 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
Tom Stellard75aadc22012-12-11 21:25:42 +000064}
65
Tom Stellard81d871d2013-11-13 23:36:50 +000066int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
67 const MachineRegisterInfo &MRI = MF.getRegInfo();
68 const MachineFrameInfo *MFI = MF.getFrameInfo();
69 int Offset = -1;
70
71 if (MFI->getNumObjects() == 0) {
72 return -1;
73 }
74
75 if (MRI.livein_empty()) {
76 return 0;
77 }
78
79 const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
80 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
81 LE = MRI.livein_end();
82 LI != LE; ++LI) {
83 unsigned Reg = LI->first;
84 if (TargetRegisterInfo::isVirtualRegister(Reg) ||
85 !IndirectRC->contains(Reg))
86 continue;
87
88 unsigned RegIndex;
89 unsigned RegEnd;
90 for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
91 ++RegIndex) {
92 if (IndirectRC->getRegister(RegIndex) == Reg)
93 break;
94 }
95 Offset = std::max(Offset, (int)RegIndex);
96 }
97
98 return Offset + 1;
99}
100
101int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
102 int Offset = 0;
103 const MachineFrameInfo *MFI = MF.getFrameInfo();
104
105 // Variable sized objects are not supported
106 assert(!MFI->hasVarSizedObjects());
107
108 if (MFI->getNumObjects() == 0) {
109 return -1;
110 }
111
James Y Knight5567baf2015-08-15 02:32:35 +0000112 unsigned IgnoredFrameReg;
113 Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
114 MF, -1, IgnoredFrameReg);
Tom Stellard81d871d2013-11-13 23:36:50 +0000115
116 return getIndirectIndexBegin(MF) + Offset;
117}
118
Tom Stellard682bfbc2013-10-10 17:11:24 +0000119int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
120 switch (Channels) {
121 default: return Opcode;
122 case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
123 case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
124 case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
125 }
126}
Tom Stellardc721a232014-05-16 20:56:47 +0000127
128// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
Matt Arsenault1f0227a2014-10-07 21:29:56 +0000129// header files, so we need to wrap it in a function that takes unsigned
Tom Stellardc721a232014-05-16 20:56:47 +0000130// instead.
131namespace llvm {
132namespace AMDGPU {
Marek Olsaka93603d2015-01-15 18:42:51 +0000133static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
Marek Olsak5df00d62014-12-07 12:18:57 +0000134 return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
Tom Stellardc721a232014-05-16 20:56:47 +0000135}
136}
137}
Marek Olsaka93603d2015-01-15 18:42:51 +0000138
139// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
140enum SISubtarget {
141 SI = 0,
142 VI = 1
143};
144
Benjamin Kramer970eac42015-02-06 17:51:54 +0000145static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
Marek Olsaka93603d2015-01-15 18:42:51 +0000146 switch (Gen) {
147 default:
148 return SI;
149 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
150 return VI;
151 }
152}
153
154int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Eric Christopher6c5b5112015-03-11 18:43:21 +0000155 int MCOp = AMDGPU::getMCOpcode(
156 Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
Marek Olsaka93603d2015-01-15 18:42:51 +0000157
158 // -1 means that Opcode is already a native instruction.
159 if (MCOp == -1)
160 return Opcode;
161
162 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
163 // no encoding in the given subtarget generation.
164 if (MCOp == (uint16_t)-1)
165 return -1;
166
167 return MCOp;
168}