blob: cb2064cf19e2fb092da65dee5b69c9bc854567b3 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Implementation of the TargetInstrInfo class that is common to all
12/// AMD GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUTargetMachine.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22
Chandler Carruthd174b722014-04-22 02:03:14 +000023using namespace llvm;
24
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000025#define GET_INSTRINFO_CTOR_DTOR
Tom Stellard75aadc22012-12-11 21:25:42 +000026#include "AMDGPUGenInstrInfo.inc"
27
Nicolai Haehnle5d0d3032018-04-01 17:09:07 +000028namespace llvm {
29namespace AMDGPU {
30#define GET_RSRCINTRINSIC_IMPL
31#include "AMDGPUGenSearchableTables.inc"
32}
33}
34
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000035// Pin the vtable to this file.
36void AMDGPUInstrInfo::anchor() {}
37
Matt Arsenault43e92fe2016-06-24 06:30:11 +000038AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +000039 : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
40 ST(ST),
41 AMDGPUASI(ST.getAMDGPUAS()) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000042
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000043// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
44// the first 16 loads will be interleaved with the stores, and the next 16 will
45// be clustered as expected. It should really split into 2 16 store batches.
46//
47// Loads are clustered until this returns false, rather than trying to schedule
48// groups of stores. This also means we have to deal with saying different
49// address space loads should be clustered, and ones which might cause bank
50// conflicts.
51//
52// This might be deprecated so it might not be worth that much effort to fix.
53bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
54 int64_t Offset0, int64_t Offset1,
55 unsigned NumLoads) const {
56 assert(Offset1 > Offset0 &&
57 "Second offset should be larger than first offset!");
58 // If we have less than 16 loads in a row, and the offsets are within 64
59 // bytes, then schedule together.
60
61 // A cacheline is 64 bytes (for global memory).
62 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
Tom Stellard75aadc22012-12-11 21:25:42 +000063}
64
Matt Arsenault43e92fe2016-06-24 06:30:11 +000065// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
66enum SIEncodingFamily {
67 SI = 0,
Sam Kolton549c89d2017-06-21 08:53:38 +000068 VI = 1,
69 SDWA = 2,
Dmitry Preobrazhensky1e325502017-08-09 17:10:47 +000070 SDWA9 = 3,
Changpeng Fang44dfa1d2018-01-12 21:12:19 +000071 GFX80 = 4,
72 GFX9 = 5
Matt Arsenault43e92fe2016-06-24 06:30:11 +000073};
74
Matt Arsenault43e92fe2016-06-24 06:30:11 +000075static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
76 switch (ST.getGeneration()) {
77 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
78 case AMDGPUSubtarget::SEA_ISLANDS:
79 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000080 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
Matt Arsenaulte823d922017-02-18 18:29:53 +000081 case AMDGPUSubtarget::GFX9:
Matt Arsenault43e92fe2016-06-24 06:30:11 +000082 return SIEncodingFamily::VI;
83
84 // FIXME: This should never be called for r600 GPUs.
85 case AMDGPUSubtarget::R600:
86 case AMDGPUSubtarget::R700:
87 case AMDGPUSubtarget::EVERGREEN:
88 case AMDGPUSubtarget::NORTHERN_ISLANDS:
89 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000090 }
Simon Pilgrim634dde32016-06-27 12:58:10 +000091
92 llvm_unreachable("Unknown subtarget generation!");
Marek Olsaka93603d2015-01-15 18:42:51 +000093}
94
95int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Sam Kolton549c89d2017-06-21 08:53:38 +000096 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000097
98 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
99 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
100 Gen = SIEncodingFamily::GFX9;
101
Sam Kolton549c89d2017-06-21 08:53:38 +0000102 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
103 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
104 : SIEncodingFamily::SDWA;
Changpeng Fang29fcf882018-02-01 18:41:33 +0000105 // Adjust the encoding family to GFX80 for D16 buffer instructions when the
106 // subtarget has UnpackedD16VMem feature.
107 // TODO: remove this when we discard GFX80 encoding.
108 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
109 && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
110 Gen = SIEncodingFamily::GFX80;
Sam Kolton549c89d2017-06-21 08:53:38 +0000111
112 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
Marek Olsaka93603d2015-01-15 18:42:51 +0000113
114 // -1 means that Opcode is already a native instruction.
115 if (MCOp == -1)
116 return Opcode;
117
118 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
119 // no encoding in the given subtarget generation.
120 if (MCOp == (uint16_t)-1)
121 return -1;
122
123 return MCOp;
124}
Matt Arsenaultbcf7bec2018-02-09 16:57:48 +0000125
126// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
127bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
128 const Value *Ptr = MMO->getValue();
129 // UndefValue means this is a load of a kernel input. These are uniform.
130 // Sometimes LDS instructions have constant pointers.
131 // If Ptr is null, then that means this mem operand contains a
132 // PseudoSourceValue like GOT.
133 if (!Ptr || isa<UndefValue>(Ptr) ||
134 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
135 return true;
136
Matt Arsenault923712b2018-02-09 16:57:57 +0000137 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
138 return true;
139
Matt Arsenaultbcf7bec2018-02-09 16:57:48 +0000140 if (const Argument *Arg = dyn_cast<Argument>(Ptr))
141 return AMDGPU::isArgPassedInSGPR(Arg);
142
143 const Instruction *I = dyn_cast<Instruction>(Ptr);
144 return I && I->getMetadata("amdgpu.uniform");
145}