Blame - llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp - toolchain/llvm-project

blob: 2643cb05742c1f7950498c8eff5ddd60c50c8d1f [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// Implements the AMDGPU specific subclass of TargetSubtarget.
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "AMDGPUSubtarget.h"
Quentin Colombet	f3f7d4d	2017-07-05 18:40:56 +0000	[diff] [blame]	15	#include "AMDGPU.h"
				16	#include "AMDGPUTargetMachine.h"
Quentin Colombet	f3f7d4d	2017-07-05 18:40:56 +0000	[diff] [blame]	17	#include "AMDGPUCallLowering.h"
				18	#include "AMDGPUInstructionSelector.h"
				19	#include "AMDGPULegalizerInfo.h"
				20	#include "AMDGPURegisterBankInfo.h"
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	21	#include "SIMachineFunctionInfo.h"
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	22	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Matt Arsenault	d9a23ab	2014-07-13 02:08:26 +0000	[diff] [blame]	23	#include "llvm/ADT/SmallString.h"
Tom Stellard	83f0bce	2015-01-29 16:55:25 +0000	[diff] [blame]	24	#include "llvm/CodeGen/MachineScheduler.h"
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	25	#include "llvm/MC/MCSubtargetInfo.h"
Stanislav Mekhanoshin	c90347d	2017-04-12 20:48:56 +0000	[diff] [blame]	26	#include "llvm/IR/MDBuilder.h"
David Blaikie	1be62f0	2017-11-03 22:32:11 +0000	[diff] [blame]	27	#include "llvm/CodeGen/TargetFrameLowering.h"
Eugene Zelenko	6a9226d	2016-12-12 22:23:53 +0000	[diff] [blame]	28	#include <algorithm>
Matt Arsenault	d9a23ab	2014-07-13 02:08:26 +0000	[diff] [blame]	29
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	30	using namespace llvm;
				31
Chandler Carruth	e96dd89	2014-04-21 22:55:11 +0000	[diff] [blame]	32	#define DEBUG_TYPE "amdgpu-subtarget"
				33
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	34	#define GET_SUBTARGETINFO_TARGET_DESC
				35	#define GET_SUBTARGETINFO_CTOR
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	36	#define AMDGPUSubtarget GCNSubtarget
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	37	#include "AMDGPUGenSubtargetInfo.inc"
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	38	#define GET_SUBTARGETINFO_TARGET_DESC
				39	#define GET_SUBTARGETINFO_CTOR
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	40	#undef AMDGPUSubtarget
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	41	#include "R600GenSubtargetInfo.inc"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	42
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	43	GCNSubtarget::~GCNSubtarget() = default;
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	44
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	45	R600Subtarget &
				46	R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
				47	StringRef GPU, StringRef FS) {
Matt Arsenault	055e4dc	2019-03-29 19:14:54 +0000	[diff] [blame]	48	SmallString<256> FullFS("+promote-alloca,");
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	49	FullFS += FS;
				50	ParseSubtargetFeatures(GPU, FullFS);
				51
				52	// FIXME: I don't think think Evergreen has any useful support for
				53	// denormals, but should be checked. Should we issue a warning somewhere
				54	// if someone tries to enable these?
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	55	if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	56	FP32Denormals = false;
				57	}
				58
				59	HasMulU24 = getGeneration() >= EVERGREEN;
				60	HasMulI24 = hasCaymanISA();
				61
				62	return *this;
				63	}
				64
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	65	GCNSubtarget &
				66	GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
Matt Arsenault	f426ddb	2019-04-03 01:58:57 +0000	[diff] [blame]	67	StringRef GPU, StringRef FS) {
Eric Christopher	ac4b69e	2014-07-25 22:22:39 +0000	[diff] [blame]	68	// Determine default and user-specified characteristics
Matt Arsenault	f171cf2	2014-07-14 23:40:49 +0000	[diff] [blame]	69	// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
				70	// enabled, but some instructions do not respect them and they run at the
				71	// double precision rate, so don't enable by default.
				72	//
				73	// We want to be able to turn these off, but making this a subtarget feature
				74	// for SI has the unhelpful behavior that it unsets everything else if you
				75	// disable it.
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	76	//
				77	// Similarly we want enable-prt-strict-null to be on by default and not to
				78	// unset everything else if it is disabled
Matt Arsenault	d9a23ab	2014-07-13 02:08:26 +0000	[diff] [blame]	79
Matt Arsenault	f426ddb	2019-04-03 01:58:57 +0000	[diff] [blame]	80	// Assuming ECC is enabled is the conservative default.
				81	SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,");
Jan Vesely	d1c9b61	2017-12-04 22:57:29 +0000	[diff] [blame]	82
Changpeng Fang	b41574a	2015-12-22 20:55:23 +0000	[diff] [blame]	83	if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
Matt Arsenault	e0c1f9e	2019-03-17 21:31:35 +0000	[diff] [blame]	84	FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
Matt Arsenault	a6867fd	2017-01-23 22:31:03 +0000	[diff] [blame]	85
Jan Vesely	d1c9b61	2017-12-04 22:57:29 +0000	[diff] [blame]	86	// FIXME: I don't think think Evergreen has any useful support for
				87	// denormals, but should be checked. Should we issue a warning somewhere
				88	// if someone tries to enable these?
				89	if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
				90	FullFS += "+fp64-fp16-denormals,";
				91	} else {
				92	FullFS += "-fp32-denormals,";
				93	}
				94
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	95	FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
				96
Matt Arsenault	d9a23ab	2014-07-13 02:08:26 +0000	[diff] [blame]	97	FullFS += FS;
				98
				99	ParseSubtargetFeatures(GPU, FullFS);
Tom Stellard	2e59a45	2014-06-13 01:32:00 +0000	[diff] [blame]	100
Jan Vesely	d1c9b61	2017-12-04 22:57:29 +0000	[diff] [blame]	101	// We don't support FP64 for EG/NI atm.
				102	assert(!hasFP64() \|\| (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
				103
Matt Arsenault	d8f7ea3	2017-01-27 17:42:26 +0000	[diff] [blame]	104	// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
				105	// on VI and newer hardware to avoid assertion failures due to missing ADDR64
				106	// variants of MUBUF instructions.
				107	if (!hasAddr64() && !FS.contains("flat-for-global")) {
				108	FlatForGlobal = true;
				109	}
				110
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	111	// Set defaults if needed.
				112	if (MaxPrivateElementSize == 0)
Matt Arsenault	e8ed8e5	2016-05-11 00:28:54 +0000	[diff] [blame]	113	MaxPrivateElementSize = 4;
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	114
Matt Arsenault	8728c5f	2017-08-07 14:58:04 +0000	[diff] [blame]	115	if (LDSBankCount == 0)
				116	LDSBankCount = 32;
				117
				118	if (TT.getArch() == Triple::amdgcn) {
				119	if (LocalMemorySize == 0)
				120	LocalMemorySize = 32768;
				121
				122	// Do something sensible for unspecified target.
				123	if (!HasMovrel && !HasVGPRIndexMode)
				124	HasMovrel = true;
				125	}
				126
Matt Arsenault	d704727	2019-02-08 19:18:01 +0000	[diff] [blame]	127	// Don't crash on invalid devices.
				128	if (WavefrontSize == 0)
				129	WavefrontSize = 64;
				130
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	131	HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
				132
Matt Arsenault	f426ddb	2019-04-03 01:58:57 +0000	[diff] [blame]	133	// ECC is on by default, but turn it off if the hardware doesn't support it
				134	// anyway. This matters for the gfx9 targets with d16 loads, but don't support
				135	// ECC.
				136	if (DoesNotSupportSRAMECC && EnableSRAMECC) {
				137	ToggleFeature(AMDGPU::FeatureSRAMECC);
				138	EnableSRAMECC = false;
				139	}
				140
Eric Christopher	ac4b69e	2014-07-25 22:22:39 +0000	[diff] [blame]	141	return *this;
				142	}
				143
Konstantin Zhuravlyov	71e43ee	2018-09-12 18:50:47 +0000	[diff] [blame]	144	AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	145	TargetTriple(TT),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	146	Has16BitInsts(false),
				147	HasMadMixInsts(false),
				148	FP32Denormals(false),
				149	FPExceptions(false),
				150	HasSDWA(false),
				151	HasVOP3PInsts(false),
				152	HasMulI24(true),
				153	HasMulU24(true),
Matt Arsenault	6c7ba82	2018-08-15 21:03:55 +0000	[diff] [blame]	154	HasInv2PiInlineImm(false),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	155	HasFminFmaxLegacy(true),
				156	EnablePromoteAlloca(false),
David Stuttard	20de3e9	2018-09-14 10:27:19 +0000	[diff] [blame]	157	HasTrigReducedRange(false),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	158	LocalMemorySize(0),
				159	WavefrontSize(0)
				160	{ }
				161
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	162	GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
Konstantin Zhuravlyov	71e43ee	2018-09-12 18:50:47 +0000	[diff] [blame]	163	const GCNTargetMachine &TM) :
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	164	AMDGPUGenSubtargetInfo(TT, GPU, FS),
Konstantin Zhuravlyov	71e43ee	2018-09-12 18:50:47 +0000	[diff] [blame]	165	AMDGPUSubtarget(TT),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	166	TargetTriple(TT),
Matt Arsenault	e0c1f9e	2019-03-17 21:31:35 +0000	[diff] [blame]	167	Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
Stanislav Mekhanoshin	06d3b41	2018-09-17 16:04:32 +0000	[diff] [blame]	168	InstrItins(getInstrItineraryForCPU(GPU)),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	169	LDSBankCount(0),
				170	MaxPrivateElementSize(0),
Tom Stellard	40ce8af	2015-01-28 16:04:26 +0000	[diff] [blame]	171
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	172	FastFMAF32(false),
				173	HalfRate64Ops(false),
				174
Matt Arsenault	a6867fd	2017-01-23 22:31:03 +0000	[diff] [blame]	175	FP64FP16Denormals(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	176	FlatForGlobal(false),
Konstantin Zhuravlyov	be6c0ca	2017-06-02 17:40:26 +0000	[diff] [blame]	177	AutoWaitcntBeforeBarrier(false),
Konstantin Zhuravlyov	eda425e	2017-10-14 15:59:07 +0000	[diff] [blame]	178	CodeObjectV3(false),
Tom Stellard	64a9d08	2016-10-14 18:10:39 +0000	[diff] [blame]	179	UnalignedScratchAccess(false),
Matt Arsenault	7f681ac	2016-07-01 23:03:44 +0000	[diff] [blame]	180	UnalignedBufferAccess(false),
				181
Matt Arsenault	e823d92	2017-02-18 18:29:53 +0000	[diff] [blame]	182	HasApertureRegs(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	183	EnableXNACK(false),
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	184	EnableCuMode(false),
Wei Ding	205bfdb	2017-02-10 02:15:29 +0000	[diff] [blame]	185	TrapHandler(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	186
Matt Arsenault	45b9818	2017-11-15 00:45:43 +0000	[diff] [blame]	187	EnableHugePrivateBuffer(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	188	EnableLoadStoreOpt(false),
				189	EnableUnsafeDSOffsetFolding(false),
				190	EnableSIScheduler(false),
Marek Olsak	a9a58fa	2018-04-10 22:48:23 +0000	[diff] [blame]	191	EnableDS128(false),
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	192	EnablePRTStrictNull(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	193	DumpCode(false),
				194
				195	FP64(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	196	GCN3Encoding(false),
				197	CIInsts(false),
Stanislav Mekhanoshin	7895c03	2019-04-05 18:24:34 +0000	[diff] [blame]	198	GFX8Insts(false),
Matt Arsenault	2021f08	2017-02-18 19:12:26 +0000	[diff] [blame]	199	GFX9Insts(false),
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	200	GFX10Insts(false),
Stanislav Mekhanoshin	7895c03	2019-04-05 18:24:34 +0000	[diff] [blame]	201	GFX7GFX8GFX9Insts(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	202	SGPRInitBug(false),
				203	HasSMemRealTime(false),
Dmitry Preobrazhensky	ff64aa5	2017-08-16 13:51:56 +0000	[diff] [blame]	204	HasIntClamp(false),
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	205	HasFmaMixInsts(false),
Matt Arsenault	cc88ce3	2016-10-12 18:00:51 +0000	[diff] [blame]	206	HasMovrel(false),
				207	HasVGPRIndexMode(false),
Matt Arsenault	c88ba36	2016-10-29 04:05:06 +0000	[diff] [blame]	208	HasScalarStores(false),
Dmitry Preobrazhensky	6bad04e	2018-04-02 16:10:25 +0000	[diff] [blame]	209	HasScalarAtomics(false),
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	210	HasSDWAOmod(false),
				211	HasSDWAScalar(false),
				212	HasSDWASdst(false),
				213	HasSDWAMac(false),
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	214	HasSDWAOutModsVOPC(false),
Sam Kolton	07dbde2	2017-01-20 10:01:25 +0000	[diff] [blame]	215	HasDPP(false),
Ryan Taylor	1f334d0	2018-08-28 15:07:30 +0000	[diff] [blame]	216	HasR128A16(false),
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	217	HasNSAEncoding(false),
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	218	HasDLInsts(false),
Stanislav Mekhanoshin	0e858b0	2019-02-09 00:34:21 +0000	[diff] [blame]	219	HasDot1Insts(false),
				220	HasDot2Insts(false),
Konstantin Zhuravlyov	108927b	2018-11-05 22:44:19 +0000	[diff] [blame]	221	EnableSRAMECC(false),
Matt Arsenault	f426ddb	2019-04-03 01:58:57 +0000	[diff] [blame]	222	DoesNotSupportSRAMECC(false),
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	223	HasNoSdstCMPX(false),
				224	HasVscnt(false),
				225	HasRegisterBanking(false),
				226	HasVOP3Literal(false),
				227	HasNoDataDepHazard(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	228	FlatAddressSpace(false),
Matt Arsenault	acdc765	2017-05-10 21:19:05 +0000	[diff] [blame]	229	FlatInstOffsets(false),
				230	FlatGlobalInsts(false),
				231	FlatScratchInsts(false),
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	232	ScalarFlatScratchInsts(false),
Matt Arsenault	c37fe66	2017-07-20 17:42:47 +0000	[diff] [blame]	233	AddNoCarryInsts(false),
Changpeng Fang	44dfa1d	2018-01-12 21:12:19 +0000	[diff] [blame]	234	HasUnpackedD16VMem(false),
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	235	LDSMisalignedBug(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	236
Alexander Timofeev	1800956	2016-12-08 17:28:47 +0000	[diff] [blame]	237	ScalarizeGlobal(false),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	238
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	239	HasVcmpxPermlaneHazard(false),
				240	HasVMEMtoScalarWriteHazard(false),
				241	HasSMEMtoVectorWriteHazard(false),
				242	HasInstFwdPrefetchBug(false),
				243	HasVcmpxExecWARHazard(false),
				244	HasLdsBranchVmemWARHazard(false),
				245	HasNSAtoVMEMBug(false),
				246	HasFlatSegmentOffsetBug(false),
				247
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	248	FeatureDisable(false),
Tom Stellard	752ddbd	2018-07-11 22:15:15 +0000	[diff] [blame]	249	InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
Matt Arsenault	4bec7d4	2018-07-20 09:05:08 +0000	[diff] [blame]	250	TLInfo(TM, *this),
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	251	FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	252	CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
				253	Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
				254	RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
				255	InstSelector.reset(new AMDGPUInstructionSelector(
				256	this, static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
Tom Stellard	a40f971	2014-01-22 21:55:43 +0000	[diff] [blame]	257	}
Tom Stellard	b8fd6ef	2014-12-02 22:00:07 +0000	[diff] [blame]	258
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	259	unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
Stanislav Mekhanoshin	2b913b1	2017-02-01 22:59:50 +0000	[diff] [blame]	260	const Function &F) const {
				261	if (NWaves == 1)
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	262	return getLocalMemorySize();
Stanislav Mekhanoshin	2b913b1	2017-02-01 22:59:50 +0000	[diff] [blame]	263	unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
				264	unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	265	if (!WorkGroupsPerCu)
				266	return 0;
Stanislav Mekhanoshin	2b913b1	2017-02-01 22:59:50 +0000	[diff] [blame]	267	unsigned MaxWaves = getMaxWavesPerEU();
				268	return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	269	}
				270
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	271	unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
Stanislav Mekhanoshin	2b913b1	2017-02-01 22:59:50 +0000	[diff] [blame]	272	const Function &F) const {
				273	unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
				274	unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	275	if (!WorkGroupsPerCu)
				276	return 0;
Stanislav Mekhanoshin	2b913b1	2017-02-01 22:59:50 +0000	[diff] [blame]	277	unsigned MaxWaves = getMaxWavesPerEU();
				278	unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
				279	unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
				280	NumWaves = std::min(NumWaves, MaxWaves);
				281	NumWaves = std::max(NumWaves, 1u);
				282	return NumWaves;
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	283	}
				284
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	285	unsigned
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	286	AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	287	const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
				288	return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
				289	}
				290
Matt Arsenault	b791802	2017-10-23 17:09:35 +0000	[diff] [blame]	291	std::pair<unsigned, unsigned>
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	292	AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
Matt Arsenault	b791802	2017-10-23 17:09:35 +0000	[diff] [blame]	293	switch (CC) {
				294	case CallingConv::AMDGPU_CS:
				295	case CallingConv::AMDGPU_KERNEL:
				296	case CallingConv::SPIR_KERNEL:
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	297	return std::make_pair(getWavefrontSize() * 2,
				298	std::max(getWavefrontSize() * 4, 256u));
Matt Arsenault	b791802	2017-10-23 17:09:35 +0000	[diff] [blame]	299	case CallingConv::AMDGPU_VS:
				300	case CallingConv::AMDGPU_LS:
				301	case CallingConv::AMDGPU_HS:
				302	case CallingConv::AMDGPU_ES:
				303	case CallingConv::AMDGPU_GS:
				304	case CallingConv::AMDGPU_PS:
				305	return std::make_pair(1, getWavefrontSize());
				306	default:
				307	return std::make_pair(1, 16 * getWavefrontSize());
				308	}
				309	}
				310
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	311	std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	312	const Function &F) const {
Matt Arsenault	b791802	2017-10-23 17:09:35 +0000	[diff] [blame]	313	// FIXME: 1024 if function.
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	314	// Default minimum/maximum flat work group sizes.
				315	std::pair<unsigned, unsigned> Default =
Matt Arsenault	b791802	2017-10-23 17:09:35 +0000	[diff] [blame]	316	getDefaultFlatWorkGroupSize(F.getCallingConv());
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	317
				318	// TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
				319	// starts using "amdgpu-flat-work-group-size" attribute.
				320	Default.second = AMDGPU::getIntegerAttribute(
				321	F, "amdgpu-max-work-group-size", Default.second);
				322	Default.first = std::min(Default.first, Default.second);
				323
				324	// Requested minimum/maximum flat work group sizes.
				325	std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
				326	F, "amdgpu-flat-work-group-size", Default);
				327
				328	// Make sure requested minimum is less than requested maximum.
				329	if (Requested.first > Requested.second)
				330	return Default;
				331
				332	// Make sure requested values do not violate subtarget's specifications.
				333	if (Requested.first < getMinFlatWorkGroupSize())
				334	return Default;
				335	if (Requested.second > getMaxFlatWorkGroupSize())
				336	return Default;
				337
				338	return Requested;
				339	}
				340
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	341	std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	342	const Function &F) const {
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	343	// Default minimum/maximum number of waves per execution unit.
Konstantin Zhuravlyov	fd87137	2017-02-09 21:33:23 +0000	[diff] [blame]	344	std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	345
				346	// Default/requested minimum/maximum flat work group sizes.
				347	std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
				348
				349	// If minimum/maximum flat work group sizes were explicitly requested using
				350	// "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
				351	// number of waves per execution unit to values implied by requested
				352	// minimum/maximum flat work group sizes.
				353	unsigned MinImpliedByFlatWorkGroupSize =
				354	getMaxWavesPerEU(FlatWorkGroupSizes.second);
				355	bool RequestedFlatWorkGroupSize = false;
				356
				357	// TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
				358	// starts using "amdgpu-flat-work-group-size" attribute.
				359	if (F.hasFnAttribute("amdgpu-max-work-group-size") \|\|
				360	F.hasFnAttribute("amdgpu-flat-work-group-size")) {
				361	Default.first = MinImpliedByFlatWorkGroupSize;
				362	RequestedFlatWorkGroupSize = true;
				363	}
				364
				365	// Requested minimum/maximum number of waves per execution unit.
				366	std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
				367	F, "amdgpu-waves-per-eu", Default, true);
				368
				369	// Make sure requested minimum is less than requested maximum.
				370	if (Requested.second && Requested.first > Requested.second)
				371	return Default;
				372
				373	// Make sure requested values do not violate subtarget's specifications.
				374	if (Requested.first < getMinWavesPerEU() \|\|
				375	Requested.first > getMaxWavesPerEU())
				376	return Default;
				377	if (Requested.second > getMaxWavesPerEU())
				378	return Default;
				379
				380	// Make sure requested values are compatible with values implied by requested
				381	// minimum/maximum flat work group sizes.
				382	if (RequestedFlatWorkGroupSize &&
Konstantin Zhuravlyov	2ec725c	2017-07-16 19:38:47 +0000	[diff] [blame]	383	Requested.first < MinImpliedByFlatWorkGroupSize)
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	384	return Default;
				385
				386	return Requested;
				387	}
				388
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	389	bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
Stanislav Mekhanoshin	c90347d	2017-04-12 20:48:56 +0000	[diff] [blame]	390	Function *Kernel = I->getParent()->getParent();
				391	unsigned MinSize = 0;
				392	unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
				393	bool IdQuery = false;
				394
				395	// If reqd_work_group_size is present it narrows value down.
				396	if (auto *CI = dyn_cast<CallInst>(I)) {
				397	const Function *F = CI->getCalledFunction();
				398	if (F) {
				399	unsigned Dim = UINT_MAX;
				400	switch (F->getIntrinsicID()) {
				401	case Intrinsic::amdgcn_workitem_id_x:
				402	case Intrinsic::r600_read_tidig_x:
				403	IdQuery = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	404	LLVM_FALLTHROUGH;
Stanislav Mekhanoshin	c90347d	2017-04-12 20:48:56 +0000	[diff] [blame]	405	case Intrinsic::r600_read_local_size_x:
				406	Dim = 0;
				407	break;
				408	case Intrinsic::amdgcn_workitem_id_y:
				409	case Intrinsic::r600_read_tidig_y:
				410	IdQuery = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	411	LLVM_FALLTHROUGH;
Stanislav Mekhanoshin	c90347d	2017-04-12 20:48:56 +0000	[diff] [blame]	412	case Intrinsic::r600_read_local_size_y:
				413	Dim = 1;
				414	break;
				415	case Intrinsic::amdgcn_workitem_id_z:
				416	case Intrinsic::r600_read_tidig_z:
				417	IdQuery = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	418	LLVM_FALLTHROUGH;
Stanislav Mekhanoshin	c90347d	2017-04-12 20:48:56 +0000	[diff] [blame]	419	case Intrinsic::r600_read_local_size_z:
				420	Dim = 2;
				421	break;
				422	default:
				423	break;
				424	}
				425	if (Dim <= 3) {
				426	if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
				427	if (Node->getNumOperands() == 3)
				428	MinSize = MaxSize = mdconst::extract<ConstantInt>(
				429	Node->getOperand(Dim))->getZExtValue();
				430	}
				431	}
				432	}
				433
				434	if (!MaxSize)
				435	return false;
				436
				437	// Range metadata is [Lo, Hi). For ID query we need to pass max size
				438	// as Hi. For size query we need to pass Hi + 1.
				439	if (IdQuery)
				440	MinSize = 0;
				441	else
				442	++MaxSize;
				443
				444	MDBuilder MDB(I->getContext());
				445	MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
				446	APInt(32, MaxSize));
				447	I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
				448	return true;
				449	}
				450
Matt Arsenault	4bec7d4	2018-07-20 09:05:08 +0000	[diff] [blame]	451	uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
				452	unsigned &MaxAlign) const {
				453	assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL \|\|
				454	F.getCallingConv() == CallingConv::SPIR_KERNEL);
				455
				456	const DataLayout &DL = F.getParent()->getDataLayout();
				457	uint64_t ExplicitArgBytes = 0;
				458	MaxAlign = 1;
				459
				460	for (const Argument &Arg : F.args()) {
				461	Type *ArgTy = Arg.getType();
				462
				463	unsigned Align = DL.getABITypeAlignment(ArgTy);
				464	uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
				465	ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize;
				466	MaxAlign = std::max(MaxAlign, Align);
				467	}
				468
				469	return ExplicitArgBytes;
				470	}
				471
				472	unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
				473	unsigned &MaxAlign) const {
				474	uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
				475
				476	unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
				477
				478	uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
				479	unsigned ImplicitBytes = getImplicitArgNumBytes(F);
				480	if (ImplicitBytes != 0) {
				481	unsigned Alignment = getAlignmentForImplicitArgPtr();
				482	TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
				483	}
				484
				485	// Being able to dereference past the end is useful for emitting scalar loads.
				486	return alignTo(TotalSize, 4);
				487	}
				488
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	489	R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
				490	const TargetMachine &TM) :
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	491	R600GenSubtargetInfo(TT, GPU, FS),
Konstantin Zhuravlyov	71e43ee	2018-09-12 18:50:47 +0000	[diff] [blame]	492	AMDGPUSubtarget(TT),
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	493	InstrInfo(*this),
				494	FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	495	FMA(false),
				496	CaymanISA(false),
				497	CFALUBug(false),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	498	HasVertexCache(false),
				499	R600ALUInst(false),
				500	FP64(false),
				501	TexVTXClauseSize(0),
				502	Gen(R600),
				503	TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	504	InstrItins(getInstrItineraryForCPU(GPU)) { }
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	505
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	506	void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Matt Arsenault	55dff27	2016-06-28 00:11:26 +0000	[diff] [blame]	507	unsigned NumRegionInstrs) const {
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	508	// Track register pressure so the scheduler can try to decrease
				509	// pressure once register usage is above the threshold defined by
				510	// SIRegisterInfo::getRegPressureSetLimit()
				511	Policy.ShouldTrackPressure = true;
Tom Stellard	83f0bce	2015-01-29 16:55:25 +0000	[diff] [blame]	512
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	513	// Enabling both top down and bottom up scheduling seems to give us less
				514	// register spills than just using one of these approaches on its own.
				515	Policy.OnlyTopDown = false;
				516	Policy.OnlyBottomUp = false;
Tom Stellard	83f0bce	2015-01-29 16:55:25 +0000	[diff] [blame]	517
Alexander Timofeev	9f61fea	2017-02-14 14:29:05 +0000	[diff] [blame]	518	// Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
				519	if (!enableSIScheduler())
				520	Policy.ShouldTrackLaneMasks = true;
Matt Arsenault	43e92fe	2016-06-24 06:30:11 +0000	[diff] [blame]	521	}
Tom Stellard	0bc954e	2016-03-30 16:35:09 +0000	[diff] [blame]	522
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	523	bool GCNSubtarget::hasMadF16() const {
				524	return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
				525	}
				526
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	527	unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	528	if (getGeneration() >= AMDGPUSubtarget::GFX10)
				529	return 10;
				530
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	531	if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Tom Stellard	0d23ebe	2016-08-29 19:42:52 +0000	[diff] [blame]	532	if (SGPRs <= 80)
				533	return 10;
				534	if (SGPRs <= 88)
				535	return 9;
				536	if (SGPRs <= 100)
				537	return 8;
				538	return 7;
				539	}
				540	if (SGPRs <= 48)
				541	return 10;
				542	if (SGPRs <= 56)
				543	return 9;
				544	if (SGPRs <= 64)
				545	return 8;
				546	if (SGPRs <= 72)
				547	return 7;
				548	if (SGPRs <= 80)
				549	return 6;
				550	return 5;
				551	}
				552
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	553	unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
Tom Stellard	0d23ebe	2016-08-29 19:42:52 +0000	[diff] [blame]	554	if (VGPRs <= 24)
				555	return 10;
				556	if (VGPRs <= 28)
				557	return 9;
				558	if (VGPRs <= 32)
				559	return 8;
				560	if (VGPRs <= 36)
				561	return 7;
				562	if (VGPRs <= 40)
				563	return 6;
				564	if (VGPRs <= 48)
				565	return 5;
				566	if (VGPRs <= 64)
				567	return 4;
				568	if (VGPRs <= 84)
				569	return 3;
				570	if (VGPRs <= 128)
				571	return 2;
				572	return 1;
				573	}
Matt Arsenault	4eae301	2016-10-28 20:31:47 +0000	[diff] [blame]	574
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	575	unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	576	const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	577	if (getGeneration() >= AMDGPUSubtarget::GFX10)
				578	return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
				579
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	580	if (MFI.hasFlatScratchInit()) {
				581	if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				582	return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
				583	if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
				584	return 4; // FLAT_SCRATCH, VCC (in that order).
				585	}
				586
				587	if (isXNACKEnabled())
				588	return 4; // XNACK, VCC (in that order).
				589	return 2; // VCC.
				590	}
				591
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	592	unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	593	const Function &F = MF.getFunction();
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	594	const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
				595
				596	// Compute maximum number of SGPRs function can use using default/requested
				597	// minimum number of waves per execution unit.
				598	std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
				599	unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
				600	unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
				601
				602	// Check if maximum number of SGPRs was explicitly requested using
				603	// "amdgpu-num-sgpr" attribute.
				604	if (F.hasFnAttribute("amdgpu-num-sgpr")) {
				605	unsigned Requested = AMDGPU::getIntegerAttribute(
				606	F, "amdgpu-num-sgpr", MaxNumSGPRs);
				607
				608	// Make sure requested value does not violate subtarget's specifications.
				609	if (Requested && (Requested <= getReservedNumSGPRs(MF)))
				610	Requested = 0;
				611
				612	// If more SGPRs are required to support the input user/system SGPRs,
				613	// increase to accommodate them.
				614	//
				615	// FIXME: This really ends up using the requested number of SGPRs + number
				616	// of reserved special registers in total. Theoretically you could re-use
				617	// the last input registers for these special registers, but this would
				618	// require a lot of complexity to deal with the weird aliasing.
				619	unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
				620	if (Requested && Requested < InputNumSGPRs)
				621	Requested = InputNumSGPRs;
				622
				623	// Make sure requested value is compatible with values implied by
				624	// default/requested minimum/maximum number of waves per execution unit.
				625	if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
				626	Requested = 0;
				627	if (WavesPerEU.second &&
				628	Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
				629	Requested = 0;
				630
				631	if (Requested)
				632	MaxNumSGPRs = Requested;
				633	}
				634
Matt Arsenault	4eae301	2016-10-28 20:31:47 +0000	[diff] [blame]	635	if (hasSGPRInitBug())
Konstantin Zhuravlyov	9f89ede	2017-02-08 14:05:23 +0000	[diff] [blame]	636	MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
Matt Arsenault	4eae301	2016-10-28 20:31:47 +0000	[diff] [blame]	637
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	638	return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
				639	MaxAddressableNumSGPRs);
				640	}
Matt Arsenault	4eae301	2016-10-28 20:31:47 +0000	[diff] [blame]	641
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	642	unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	643	const Function &F = MF.getFunction();
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	644	const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
				645
				646	// Compute maximum number of VGPRs function can use using default/requested
				647	// minimum number of waves per execution unit.
				648	std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
				649	unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
				650
				651	// Check if maximum number of VGPRs was explicitly requested using
				652	// "amdgpu-num-vgpr" attribute.
				653	if (F.hasFnAttribute("amdgpu-num-vgpr")) {
				654	unsigned Requested = AMDGPU::getIntegerAttribute(
				655	F, "amdgpu-num-vgpr", MaxNumVGPRs);
				656
Konstantin Zhuravlyov	e03b1d7	2017-02-08 13:02:33 +0000	[diff] [blame]	657	// Make sure requested value is compatible with values implied by
				658	// default/requested minimum/maximum number of waves per execution unit.
				659	if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
				660	Requested = 0;
				661	if (WavesPerEU.second &&
				662	Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
				663	Requested = 0;
				664
				665	if (Requested)
				666	MaxNumVGPRs = Requested;
				667	}
				668
Konstantin Zhuravlyov	e004b3d	2018-06-21 20:28:19 +0000	[diff] [blame]	669	return MaxNumVGPRs;
Matt Arsenault	4eae301	2016-10-28 20:31:47 +0000	[diff] [blame]	670	}
Stanislav Mekhanoshin	d4ae470	2017-09-19 20:54:38 +0000	[diff] [blame]	671
Benjamin Kramer	f9ab3dd	2017-10-31 23:21:30 +0000	[diff] [blame]	672	namespace {
Stanislav Mekhanoshin	d4ae470	2017-09-19 20:54:38 +0000	[diff] [blame]	673	struct MemOpClusterMutation : ScheduleDAGMutation {
				674	const SIInstrInfo *TII;
				675
				676	MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
				677
Clement Courbet	b70355f	2019-03-29 08:33:05 +0000	[diff] [blame]	678	void apply(ScheduleDAGInstrs *DAG) override {
Stanislav Mekhanoshin	d4ae470	2017-09-19 20:54:38 +0000	[diff] [blame]	679	SUnit *SUa = nullptr;
				680	// Search for two consequent memory operations and link them
				681	// to prevent scheduler from moving them apart.
				682	// In DAG pre-process SUnits are in the original order of
				683	// the instructions before scheduling.
				684	for (SUnit &SU : DAG->SUnits) {
				685	MachineInstr &MI2 = *SU.getInstr();
				686	if (!MI2.mayLoad() && !MI2.mayStore()) {
				687	SUa = nullptr;
				688	continue;
				689	}
				690	if (!SUa) {
				691	SUa = &SU;
				692	continue;
				693	}
				694
				695	MachineInstr &MI1 = *SUa->getInstr();
				696	if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) \|\|
				697	(TII->isFLAT(MI1) && TII->isFLAT(MI2)) \|\|
				698	(TII->isSMRD(MI1) && TII->isSMRD(MI2)) \|\|
				699	(TII->isDS(MI1) && TII->isDS(MI2))) {
				700	SU.addPredBarrier(SUa);
				701
				702	for (const SDep &SI : SU.Preds) {
				703	if (SI.getSUnit() != SUa)
				704	SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
				705	}
				706
				707	if (&SU != &DAG->ExitSU) {
				708	for (const SDep &SI : SUa->Succs) {
				709	if (SI.getSUnit() != &SU)
				710	SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
				711	}
				712	}
				713	}
				714
				715	SUa = &SU;
				716	}
				717	}
				718	};
Benjamin Kramer	f9ab3dd	2017-10-31 23:21:30 +0000	[diff] [blame]	719	} // namespace
Stanislav Mekhanoshin	d4ae470	2017-09-19 20:54:38 +0000	[diff] [blame]	720
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	721	void GCNSubtarget::getPostRAMutations(
Stanislav Mekhanoshin	d4ae470	2017-09-19 20:54:38 +0000	[diff] [blame]	722	std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
				723	Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
				724	}
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	725
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	726	const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	727	if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	728	return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	729	else
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	730	return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>());
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	731	}
				732
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	733	const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	734	if (TM.getTargetTriple().getArch() == Triple::amdgcn)
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	735	return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	736	else
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	737	return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	738	}