blob: 0b173abf035a89f0b7ce5097666b70a21658dfff [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000011/// Implementation of the TargetInstrInfo class that is common to all
Tom Stellard75aadc22012-12-11 21:25:42 +000012/// AMD GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUTargetMachine.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22
Chandler Carruthd174b722014-04-22 02:03:14 +000023using namespace llvm;
24
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000025#define GET_INSTRINFO_CTOR_DTOR
Tom Stellard75aadc22012-12-11 21:25:42 +000026#include "AMDGPUGenInstrInfo.inc"
27
Nicolai Haehnle5d0d3032018-04-01 17:09:07 +000028namespace llvm {
29namespace AMDGPU {
30#define GET_RSRCINTRINSIC_IMPL
31#include "AMDGPUGenSearchableTables.inc"
Nicolai Haehnle2f5a7382018-04-04 10:58:54 +000032
33#define GET_D16IMAGEDIMINTRINSIC_IMPL
34#include "AMDGPUGenSearchableTables.inc"
Nicolai Haehnle5d0d3032018-04-01 17:09:07 +000035}
36}
37
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000038// Pin the vtable to this file.
39void AMDGPUInstrInfo::anchor() {}
40
Matt Arsenault43e92fe2016-06-24 06:30:11 +000041AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +000042 : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
43 ST(ST),
44 AMDGPUASI(ST.getAMDGPUAS()) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000045
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000046// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
47// the first 16 loads will be interleaved with the stores, and the next 16 will
48// be clustered as expected. It should really split into 2 16 store batches.
49//
50// Loads are clustered until this returns false, rather than trying to schedule
51// groups of stores. This also means we have to deal with saying different
52// address space loads should be clustered, and ones which might cause bank
53// conflicts.
54//
55// This might be deprecated so it might not be worth that much effort to fix.
56bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
57 int64_t Offset0, int64_t Offset1,
58 unsigned NumLoads) const {
59 assert(Offset1 > Offset0 &&
60 "Second offset should be larger than first offset!");
61 // If we have less than 16 loads in a row, and the offsets are within 64
62 // bytes, then schedule together.
63
64 // A cacheline is 64 bytes (for global memory).
65 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
Tom Stellard75aadc22012-12-11 21:25:42 +000066}
67
Matt Arsenault43e92fe2016-06-24 06:30:11 +000068// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
69enum SIEncodingFamily {
70 SI = 0,
Sam Kolton549c89d2017-06-21 08:53:38 +000071 VI = 1,
72 SDWA = 2,
Dmitry Preobrazhensky1e325502017-08-09 17:10:47 +000073 SDWA9 = 3,
Changpeng Fang44dfa1d2018-01-12 21:12:19 +000074 GFX80 = 4,
75 GFX9 = 5
Matt Arsenault43e92fe2016-06-24 06:30:11 +000076};
77
Matt Arsenault43e92fe2016-06-24 06:30:11 +000078static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
79 switch (ST.getGeneration()) {
80 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
81 case AMDGPUSubtarget::SEA_ISLANDS:
82 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000083 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
Matt Arsenaulte823d922017-02-18 18:29:53 +000084 case AMDGPUSubtarget::GFX9:
Matt Arsenault43e92fe2016-06-24 06:30:11 +000085 return SIEncodingFamily::VI;
86
87 // FIXME: This should never be called for r600 GPUs.
88 case AMDGPUSubtarget::R600:
89 case AMDGPUSubtarget::R700:
90 case AMDGPUSubtarget::EVERGREEN:
91 case AMDGPUSubtarget::NORTHERN_ISLANDS:
92 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000093 }
Simon Pilgrim634dde32016-06-27 12:58:10 +000094
95 llvm_unreachable("Unknown subtarget generation!");
Marek Olsaka93603d2015-01-15 18:42:51 +000096}
97
98int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Sam Kolton549c89d2017-06-21 08:53:38 +000099 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000100
101 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
102 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
103 Gen = SIEncodingFamily::GFX9;
104
Sam Kolton549c89d2017-06-21 08:53:38 +0000105 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
106 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
107 : SIEncodingFamily::SDWA;
Changpeng Fang29fcf882018-02-01 18:41:33 +0000108 // Adjust the encoding family to GFX80 for D16 buffer instructions when the
109 // subtarget has UnpackedD16VMem feature.
110 // TODO: remove this when we discard GFX80 encoding.
111 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
112 && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
113 Gen = SIEncodingFamily::GFX80;
Sam Kolton549c89d2017-06-21 08:53:38 +0000114
115 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
Marek Olsaka93603d2015-01-15 18:42:51 +0000116
117 // -1 means that Opcode is already a native instruction.
118 if (MCOp == -1)
119 return Opcode;
120
121 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
122 // no encoding in the given subtarget generation.
123 if (MCOp == (uint16_t)-1)
124 return -1;
125
126 return MCOp;
127}
Matt Arsenaultbcf7bec2018-02-09 16:57:48 +0000128
129// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
130bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
131 const Value *Ptr = MMO->getValue();
132 // UndefValue means this is a load of a kernel input. These are uniform.
133 // Sometimes LDS instructions have constant pointers.
134 // If Ptr is null, then that means this mem operand contains a
135 // PseudoSourceValue like GOT.
136 if (!Ptr || isa<UndefValue>(Ptr) ||
137 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
138 return true;
139
Matt Arsenault923712b2018-02-09 16:57:57 +0000140 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
141 return true;
142
Matt Arsenaultbcf7bec2018-02-09 16:57:48 +0000143 if (const Argument *Arg = dyn_cast<Argument>(Ptr))
144 return AMDGPU::isArgPassedInSGPR(Arg);
145
146 const Instruction *I = dyn_cast<Instruction>(Ptr);
147 return I && I->getMetadata("amdgpu.uniform");
148}