blob: 36a60b32d8f751065cd173be81e11c089ef3cbcc [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Implementation of the TargetInstrInfo class that is common to all
12/// AMD GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPURegisterInfo.h"
18#include "AMDGPUTargetMachine.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22
Chandler Carruthd174b722014-04-22 02:03:14 +000023using namespace llvm;
24
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000025#define GET_INSTRINFO_CTOR_DTOR
Christian Konigf741fbf2013-02-26 17:52:42 +000026#define GET_INSTRMAP_INFO
Tom Stellard75aadc22012-12-11 21:25:42 +000027#include "AMDGPUGenInstrInfo.inc"
28
Juergen Ributzkad12ccbd2013-11-19 00:57:56 +000029// Pin the vtable to this file.
30void AMDGPUInstrInfo::anchor() {}
31
Matt Arsenault43e92fe2016-06-24 06:30:11 +000032AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +000033 : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
34 ST(ST),
35 AMDGPUASI(ST.getAMDGPUAS()) {}
Tom Stellard75aadc22012-12-11 21:25:42 +000036
Matt Arsenaultd5f4de22014-08-06 00:29:49 +000037// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
38// the first 16 loads will be interleaved with the stores, and the next 16 will
39// be clustered as expected. It should really split into 2 16 store batches.
40//
41// Loads are clustered until this returns false, rather than trying to schedule
42// groups of stores. This also means we have to deal with saying different
43// address space loads should be clustered, and ones which might cause bank
44// conflicts.
45//
46// This might be deprecated so it might not be worth that much effort to fix.
47bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
48 int64_t Offset0, int64_t Offset1,
49 unsigned NumLoads) const {
50 assert(Offset1 > Offset0 &&
51 "Second offset should be larger than first offset!");
52 // If we have less than 16 loads in a row, and the offsets are within 64
53 // bytes, then schedule together.
54
55 // A cacheline is 64 bytes (for global memory).
56 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
Tom Stellard75aadc22012-12-11 21:25:42 +000057}
58
Tom Stellard682bfbc2013-10-10 17:11:24 +000059int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
60 switch (Channels) {
61 default: return Opcode;
62 case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
63 case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
64 case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
65 }
66}
Tom Stellardc721a232014-05-16 20:56:47 +000067
Matt Arsenault43e92fe2016-06-24 06:30:11 +000068// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
69enum SIEncodingFamily {
70 SI = 0,
Sam Kolton549c89d2017-06-21 08:53:38 +000071 VI = 1,
72 SDWA = 2,
73 SDWA9 = 3
Matt Arsenault43e92fe2016-06-24 06:30:11 +000074};
75
Tom Stellardc721a232014-05-16 20:56:47 +000076// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
Matt Arsenault1f0227a2014-10-07 21:29:56 +000077// header files, so we need to wrap it in a function that takes unsigned
Tom Stellardc721a232014-05-16 20:56:47 +000078// instead.
79namespace llvm {
80namespace AMDGPU {
Marek Olsaka93603d2015-01-15 18:42:51 +000081static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +000082 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
Tom Stellardc721a232014-05-16 20:56:47 +000083}
84}
85}
Marek Olsaka93603d2015-01-15 18:42:51 +000086
Matt Arsenault43e92fe2016-06-24 06:30:11 +000087static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
88 switch (ST.getGeneration()) {
89 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
90 case AMDGPUSubtarget::SEA_ISLANDS:
91 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +000092 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
Matt Arsenaulte823d922017-02-18 18:29:53 +000093 case AMDGPUSubtarget::GFX9:
Matt Arsenault43e92fe2016-06-24 06:30:11 +000094 return SIEncodingFamily::VI;
95
96 // FIXME: This should never be called for r600 GPUs.
97 case AMDGPUSubtarget::R600:
98 case AMDGPUSubtarget::R700:
99 case AMDGPUSubtarget::EVERGREEN:
100 case AMDGPUSubtarget::NORTHERN_ISLANDS:
101 return SIEncodingFamily::SI;
Marek Olsaka93603d2015-01-15 18:42:51 +0000102 }
Simon Pilgrim634dde32016-06-27 12:58:10 +0000103
104 llvm_unreachable("Unknown subtarget generation!");
Marek Olsaka93603d2015-01-15 18:42:51 +0000105}
106
107int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Sam Kolton549c89d2017-06-21 08:53:38 +0000108 SIEncodingFamily Gen = subtargetEncodingFamily(ST);
109 if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
110 Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
111 : SIEncodingFamily::SDWA;
112
113 int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
Marek Olsaka93603d2015-01-15 18:42:51 +0000114
115 // -1 means that Opcode is already a native instruction.
116 if (MCOp == -1)
117 return Opcode;
118
119 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
120 // no encoding in the given subtarget generation.
121 if (MCOp == (uint16_t)-1)
122 return -1;
123
124 return MCOp;
125}