blob: 05c7d5d84856feb75b23effb1292f861c2fbfd34 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000011/// Implementation of the TargetInstrInfo class that is common to all
Tom Stellard75aadc22012-12-11 21:25:42 +000012/// AMD GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUTargetMachine.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000019#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000020#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23
Chandler Carruthd174b722014-04-22 02:03:14 +000024using namespace llvm;
25
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000026#define GET_INSTRINFO_CTOR_DTOR
Tom Stellard75aadc22012-12-11 21:25:42 +000027#include "AMDGPUGenInstrInfo.inc"
28
Nicolai Haehnle5d0d3032018-04-01 17:09:07 +000029namespace llvm {
30namespace AMDGPU {
31#define GET_RSRCINTRINSIC_IMPL
32#include "AMDGPUGenSearchableTables.inc"
Nicolai Haehnle2f5a7382018-04-04 10:58:54 +000033
34#define GET_D16IMAGEDIMINTRINSIC_IMPL
35#include "AMDGPUGenSearchableTables.inc"
Nicolai Haehnle5d0d3032018-04-01 17:09:07 +000036}
37}
38
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000039// Pin the vtable to this file.
40void AMDGPUInstrInfo::anchor() {}
41
Matt Arsenault43e92fe2016-06-24 06:30:11 +000042AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +000043 : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
44 ST(ST),
45 AMDGPUASI(ST.getAMDGPUAS()) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000046
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000047// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
48// the first 16 loads will be interleaved with the stores, and the next 16 will
49// be clustered as expected. It should really split into 2 16 store batches.
50//
51// Loads are clustered until this returns false, rather than trying to schedule
52// groups of stores. This also means we have to deal with saying different
53// address space loads should be clustered, and ones which might cause bank
54// conflicts.
55//
56// This might be deprecated so it might not be worth that much effort to fix.
57bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
58 int64_t Offset0, int64_t Offset1,
59 unsigned NumLoads) const {
60 assert(Offset1 > Offset0 &&
61 "Second offset should be larger than first offset!");
62 // If we have less than 16 loads in a row, and the offsets are within 64
63 // bytes, then schedule together.
64
65 // A cacheline is 64 bytes (for global memory).
66 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
Tom Stellard75aadc22012-12-11 21:25:42 +000067}
68
Matt Arsenault43e92fe2016-06-24 06:30:11 +000069// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
70enum SIEncodingFamily {
71 SI = 0,
Sam Kolton549c89d2017-06-21 08:53:38 +000072 VI = 1,
73 SDWA = 2,
Dmitry Preobrazhensky1e325502017-08-09 17:10:47 +000074 SDWA9 = 3,
Changpeng Fang44dfa1d2018-01-12 21:12:19 +000075 GFX80 = 4,
76 GFX9 = 5
Matt Arsenault43e92fe2016-06-24 06:30:11 +000077};
78
Matt Arsenault43e92fe2016-06-24 06:30:11 +000079static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
80 switch (ST.getGeneration()) {
81 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
82 case AMDGPUSubtarget::SEA_ISLANDS:
83 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000084 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
Matt Arsenaulte823d922017-02-18 18:29:53 +000085 case AMDGPUSubtarget::GFX9:
Matt Arsenault43e92fe2016-06-24 06:30:11 +000086 return SIEncodingFamily::VI;
87
88 // FIXME: This should never be called for r600 GPUs.
89 case AMDGPUSubtarget::R600:
90 case AMDGPUSubtarget::R700:
91 case AMDGPUSubtarget::EVERGREEN:
92 case AMDGPUSubtarget::NORTHERN_ISLANDS:
93 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000094 }
Simon Pilgrim634dde32016-06-27 12:58:10 +000095
96 llvm_unreachable("Unknown subtarget generation!");
Marek Olsaka93603d2015-01-15 18:42:51 +000097}
98
99int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Sam Kolton549c89d2017-06-21 08:53:38 +0000100 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000101
102 if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
103 ST.getGeneration() >= AMDGPUSubtarget::GFX9)
104 Gen = SIEncodingFamily::GFX9;
105
Sam Kolton549c89d2017-06-21 08:53:38 +0000106 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
107 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
108 : SIEncodingFamily::SDWA;
Changpeng Fang29fcf882018-02-01 18:41:33 +0000109 // Adjust the encoding family to GFX80 for D16 buffer instructions when the
110 // subtarget has UnpackedD16VMem feature.
111 // TODO: remove this when we discard GFX80 encoding.
112 if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
113 && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
114 Gen = SIEncodingFamily::GFX80;
Sam Kolton549c89d2017-06-21 08:53:38 +0000115
116 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
Marek Olsaka93603d2015-01-15 18:42:51 +0000117
118 // -1 means that Opcode is already a native instruction.
119 if (MCOp == -1)
120 return Opcode;
121
122 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
123 // no encoding in the given subtarget generation.
124 if (MCOp == (uint16_t)-1)
125 return -1;
126
127 return MCOp;
128}
Matt Arsenaultbcf7bec2018-02-09 16:57:48 +0000129
130// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
131bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
132 const Value *Ptr = MMO->getValue();
133 // UndefValue means this is a load of a kernel input. These are uniform.
134 // Sometimes LDS instructions have constant pointers.
135 // If Ptr is null, then that means this mem operand contains a
136 // PseudoSourceValue like GOT.
137 if (!Ptr || isa<UndefValue>(Ptr) ||
138 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
139 return true;
140
Matt Arsenault923712b2018-02-09 16:57:57 +0000141 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
142 return true;
143
Matt Arsenaultbcf7bec2018-02-09 16:57:48 +0000144 if (const Argument *Arg = dyn_cast<Argument>(Ptr))
145 return AMDGPU::isArgPassedInSGPR(Arg);
146
147 const Instruction *I = dyn_cast<Instruction>(Ptr);
148 return I && I->getMetadata("amdgpu.uniform");
149}