Blame - llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp - toolchain/llvm-project

blob: 05c7d5d84856feb75b23effb1292f861c2fbfd34 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	11	/// Implementation of the TargetInstrInfo class that is common to all
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	12	/// AMD GPUs.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "AMDGPUInstrInfo.h"
				17	#include "AMDGPURegisterInfo.h"
				18	#include "AMDGPUTargetMachine.h"
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	19	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineFrameInfo.h"
				21	#include "llvm/CodeGen/MachineInstrBuilder.h"
				22	#include "llvm/CodeGen/MachineRegisterInfo.h"
				23
Chandler Carruth	d174b72	2014-04-22 02:03:14 +0000	[diff] [blame]	24	using namespace llvm;
				25
Juergen Ributzka	d12ccbd	2013-11-19 00:57:56 +0000	[diff] [blame]	26	#define GET_INSTRINFO_CTOR_DTOR
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	27	#include "AMDGPUGenInstrInfo.inc"
				28
Nicolai Haehnle	5d0d303	2018-04-01 17:09:07 +0000	[diff] [blame]	29	namespace llvm {
				30	namespace AMDGPU {
				31	#define GET_RSRCINTRINSIC_IMPL
				32	#include "AMDGPUGenSearchableTables.inc"
Nicolai Haehnle	2f5a738	2018-04-04 10:58:54 +0000	[diff] [blame]	33
				34	#define GET_D16IMAGEDIMINTRINSIC_IMPL
				35	#include "AMDGPUGenSearchableTables.inc"
Nicolai Haehnle	5d0d303	2018-04-01 17:09:07 +0000	[diff] [blame]	36	}
				37	}
				38
Juergen Ributzka	d12ccbd	2013-11-19 00:57:56 +0000	[diff] [blame]	39	// Pin the vtable to this file.
				40	void AMDGPUInstrInfo::anchor() {}
				41
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	42	AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
Matt Arsenault	b62a4eb	2017-08-01 19:54:18 +0000	[diff] [blame]	43	: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
				44	ST(ST),
				45	AMDGPUASI(ST.getAMDGPUAS()) {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	46
Matt Arsenault	d5f4de2	2014-08-06 00:29:49 +0000	[diff] [blame]	47	// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
				48	// the first 16 loads will be interleaved with the stores, and the next 16 will
				49	// be clustered as expected. It should really split into 2 16 store batches.
				50	//
				51	// Loads are clustered until this returns false, rather than trying to schedule
				52	// groups of stores. This also means we have to deal with saying different
				53	// address space loads should be clustered, and ones which might cause bank
				54	// conflicts.
				55	//
				56	// This might be deprecated so it might not be worth that much effort to fix.
				57	bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode Load0, SDNode Load1,
				58	int64_t Offset0, int64_t Offset1,
				59	unsigned NumLoads) const {
				60	assert(Offset1 > Offset0 &&
				61	"Second offset should be larger than first offset!");
				62	// If we have less than 16 loads in a row, and the offsets are within 64
				63	// bytes, then schedule together.
				64
				65	// A cacheline is 64 bytes (for global memory).
				66	return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	67	}
				68
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	69	// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
				70	enum SIEncodingFamily {
				71	SI = 0,
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	72	VI = 1,
				73	SDWA = 2,
Dmitry Preobrazhensky	1e32550	2017-08-09 17:10:47 +0000	[diff] [blame]	74	SDWA9 = 3,
Changpeng Fang	44dfa1d	2018-01-12 21:12:19 +0000	[diff] [blame]	75	GFX80 = 4,
				76	GFX9 = 5
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	77	};
				78
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	79	static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
				80	switch (ST.getGeneration()) {
				81	case AMDGPUSubtarget::SOUTHERN_ISLANDS:
				82	case AMDGPUSubtarget::SEA_ISLANDS:
				83	return SIEncodingFamily::SI;
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	84	case AMDGPUSubtarget::VOLCANIC_ISLANDS:
Matt Arsenault	e823d92	2017-02-18 18:29:53 +0000	[diff] [blame]	85	case AMDGPUSubtarget::GFX9:
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	86	return SIEncodingFamily::VI;
				87
				88	// FIXME: This should never be called for r600 GPUs.
				89	case AMDGPUSubtarget::R600:
				90	case AMDGPUSubtarget::R700:
				91	case AMDGPUSubtarget::EVERGREEN:
				92	case AMDGPUSubtarget::NORTHERN_ISLANDS:
				93	return SIEncodingFamily::SI;
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	94	}
Simon Pilgrim	634dde3	2016-06-27 12:58:10 +0000	[diff] [blame]	95
				96	llvm_unreachable("Unknown subtarget generation!");
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	97	}
				98
				99	int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	100	SIEncodingFamily Gen = subtargetEncodingFamily(ST);
Dmitry Preobrazhensky	a0342dc	2017-11-20 18:24:21 +0000	[diff] [blame]	101
				102	if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
				103	ST.getGeneration() >= AMDGPUSubtarget::GFX9)
				104	Gen = SIEncodingFamily::GFX9;
				105
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	106	if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
				107	Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
				108	: SIEncodingFamily::SDWA;
Changpeng Fang	29fcf88	2018-02-01 18:41:33 +0000	[diff] [blame]	109	// Adjust the encoding family to GFX80 for D16 buffer instructions when the
				110	// subtarget has UnpackedD16VMem feature.
				111	// TODO: remove this when we discard GFX80 encoding.
				112	if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
				113	&& !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
				114	Gen = SIEncodingFamily::GFX80;
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	115
				116	int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	117
				118	// -1 means that Opcode is already a native instruction.
				119	if (MCOp == -1)
				120	return Opcode;
				121
				122	// (uint16_t)-1 means that Opcode is a pseudo instruction that has
				123	// no encoding in the given subtarget generation.
				124	if (MCOp == (uint16_t)-1)
				125	return -1;
				126
				127	return MCOp;
				128	}
Matt Arsenault	bcf7bec	2018-02-09 16:57:48 +0000	[diff] [blame]	129
				130	// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
				131	bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
				132	const Value *Ptr = MMO->getValue();
				133	// UndefValue means this is a load of a kernel input. These are uniform.
				134	// Sometimes LDS instructions have constant pointers.
				135	// If Ptr is null, then that means this mem operand contains a
				136	// PseudoSourceValue like GOT.
				137	if (!Ptr \|\| isa<UndefValue>(Ptr) \|\|
				138	isa<Constant>(Ptr) \|\| isa<GlobalValue>(Ptr))
				139	return true;
				140
Matt Arsenault	923712b	2018-02-09 16:57:57 +0000	[diff] [blame]	141	if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
				142	return true;
				143
Matt Arsenault	bcf7bec	2018-02-09 16:57:48 +0000	[diff] [blame]	144	if (const Argument *Arg = dyn_cast<Argument>(Ptr))
				145	return AMDGPU::isArgPassedInSGPR(Arg);
				146
				147	const Instruction *I = dyn_cast<Instruction>(Ptr);
				148	return I && I->getMetadata("amdgpu.uniform");
				149	}