Blame - llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h - toolchain/llvm-project

blob: 72882c83c01c168421af90dc30cea7a7e47d8cab [file] [log] [blame]

Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	1	//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------- C++ --===//
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	8	//
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	9	/// \file
				10	/// This file a TargetTransformInfo::Concept conforming object specific to the
				11	/// AMDGPU target machine. It uses the target's detailed information to
				12	/// provide more precise answers to certain TTI queries, while letting the
				13	/// target independent and default TTI implementations handle the rest.
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	14	//
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	15	//===----------------------------------------------------------------------===//
				16
Matt Arsenault	6b6a2c3	2016-03-11 08:00:27 +0000	[diff] [blame]	17	#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
				18	#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	19
				20	#include "AMDGPU.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	21	#include "AMDGPUSubtarget.h"
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	22	#include "AMDGPUTargetMachine.h"
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	23	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	24	#include "Utils/AMDGPUBaseInfo.h"
				25	#include "llvm/ADT/ArrayRef.h"
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	26	#include "llvm/Analysis/TargetTransformInfo.h"
				27	#include "llvm/CodeGen/BasicTTIImpl.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	28	#include "llvm/IR/Function.h"
				29	#include "llvm/MC/SubtargetFeature.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include <cassert>
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	32
				33	namespace llvm {
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	34
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	35	class AMDGPUTargetLowering;
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	36	class Loop;
				37	class ScalarEvolution;
				38	class Type;
				39	class Value;
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	40
Matt Arsenault	6b6a2c3	2016-03-11 08:00:27 +0000	[diff] [blame]	41	class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	42	using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
				43	using TTI = TargetTransformInfo;
				44
Chandler Carruth	c340ca8	2015-02-01 14:01:15 +0000	[diff] [blame]	45	friend BaseT;
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	46
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	47	Triple TargetTriple;
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	48
				49	public:
				50	explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
				51	: BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	52	TargetTriple(TM->getTargetTriple()) {}
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	53
				54	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
				55	TTI::UnrollingPreferences &UP);
				56	};
				57
				58	class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
				59	using BaseT = BasicTTIImplBase<GCNTTIImpl>;
				60	using TTI = TargetTransformInfo;
				61
				62	friend BaseT;
				63
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	64	const GCNSubtarget *ST;
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	65	const AMDGPUTargetLowering *TLI;
				66	AMDGPUTTIImpl CommonTTI;
Matt Arsenault	b6491cc	2017-01-31 01:20:54 +0000	[diff] [blame]	67	bool IsGraphicsShader;
Chandler Carruth	c340ca8	2015-02-01 14:01:15 +0000	[diff] [blame]	68
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	69	const FeatureBitset InlineFeatureIgnoreList = {
				70	// Codegen control options which don't matter.
				71	AMDGPU::FeatureEnableLoadStoreOpt,
				72	AMDGPU::FeatureEnableSIScheduler,
				73	AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
				74	AMDGPU::FeatureFlatForGlobal,
				75	AMDGPU::FeaturePromoteAlloca,
				76	AMDGPU::FeatureUnalignedBufferAccess,
				77	AMDGPU::FeatureUnalignedScratchAccess,
				78
				79	AMDGPU::FeatureAutoWaitcntBeforeBarrier,
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	80
				81	// Property of the kernel/environment which can't actually differ.
				82	AMDGPU::FeatureSGPRInitBug,
				83	AMDGPU::FeatureXNACK,
				84	AMDGPU::FeatureTrapHandler,
Matt Arsenault	d24296e	2019-02-12 23:30:11 +0000	[diff] [blame]	85	AMDGPU::FeatureCodeObjectV3,
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	86
Matt Arsenault	f426ddb	2019-04-03 01:58:57 +0000	[diff] [blame]	87	// The default assumption needs to be ecc is enabled, but no directly
				88	// exposed operations depend on it, so it can be safely inlined.
				89	AMDGPU::FeatureSRAMECC,
				90
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	91	// Perf-tuning features
				92	AMDGPU::FeatureFastFMAF32,
				93	AMDGPU::HalfRate64Ops
				94	};
				95
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	96	const GCNSubtarget *getST() const { return ST; }
Chandler Carruth	c340ca8	2015-02-01 14:01:15 +0000	[diff] [blame]	97	const AMDGPUTargetLowering *getTLI() const { return TLI; }
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	98
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	99	static inline int getFullRateInstrCost() {
				100	return TargetTransformInfo::TCC_Basic;
				101	}
				102
				103	static inline int getHalfRateInstrCost() {
				104	return 2 * TargetTransformInfo::TCC_Basic;
				105	}
				106
				107	// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
				108	// should be 2 or 4.
				109	static inline int getQuarterRateInstrCost() {
				110	return 3 * TargetTransformInfo::TCC_Basic;
				111	}
				112
				113	// On some parts, normal fp64 operations are half rate, and others
				114	// quarter. This also applies to some integer operations.
				115	inline int get64BitInstrCost() const {
				116	return ST->hasHalfRate64Ops() ?
				117	getHalfRateInstrCost() : getQuarterRateInstrCost();
				118	}
				119
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	120	public:
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	121	explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Matt Arsenault	59c0ffa	2016-06-27 20:48:03 +0000	[diff] [blame]	122	: BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	123	ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
Matt Arsenault	b6491cc	2017-01-31 01:20:54 +0000	[diff] [blame]	124	TLI(ST->getTargetLowering()),
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	125	CommonTTI(TM, F),
Matt Arsenault	b6491cc	2017-01-31 01:20:54 +0000	[diff] [blame]	126	IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	127
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	128	bool hasBranchDivergence() { return true; }
				129
Geoff Berry	66d9bdb	2017-06-28 15:53:17 +0000	[diff] [blame]	130	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
				131	TTI::UnrollingPreferences &UP);
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	132
				133	TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
				134	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Matt Arsenault	1735da4	2016-05-18 16:10:19 +0000	[diff] [blame]	135	return TTI::PSK_FastHardware;
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	136	}
				137
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	138	unsigned getHardwareNumberOfRegisters(bool Vector) const;
				139	unsigned getNumberOfRegisters(bool Vector) const;
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	140	unsigned getRegisterBitWidth(bool Vector) const;
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	141	unsigned getMinVectorRegisterBitWidth() const;
Farhana Aleen	8919664	2018-03-07 17:09:18 +0000	[diff] [blame]	142	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				143	unsigned ChainSizeInBytes,
				144	VectorType *VecTy) const;
				145	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				146	unsigned ChainSizeInBytes,
				147	VectorType *VecTy) const;
Volkan Keles	1c38681	2016-10-03 10:31:34 +0000	[diff] [blame]	148	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
Matt Arsenault	f0a88db	2017-02-23 03:58:53 +0000	[diff] [blame]	149
				150	bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
				151	unsigned Alignment,
				152	unsigned AddrSpace) const;
				153	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				154	unsigned Alignment,
				155	unsigned AddrSpace) const;
				156	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				157	unsigned Alignment,
				158	unsigned AddrSpace) const;
				159
Wei Mi	062c744	2015-05-06 17:12:25 +0000	[diff] [blame]	160	unsigned getMaxInterleaveFactor(unsigned VF);
Matt Arsenault	e830f54	2015-12-01 19:08:39 +0000	[diff] [blame]	161
Matt Arsenault	3e268cc	2017-12-11 21:38:43 +0000	[diff] [blame]	162	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
				163
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	164	int getArithmeticInstrCost(
				165	unsigned Opcode, Type *Ty,
				166	TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
				167	TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
				168	TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
Mohammed Agabaria	2c96c43	2017-01-11 08:23:37 +0000	[diff] [blame]	169	TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
				170	ArrayRef<const Value > Args = ArrayRef<const Value >());
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	171
Matt Arsenault	e05ff15	2015-12-16 18:37:19 +0000	[diff] [blame]	172	unsigned getCFInstrCost(unsigned Opcode);
				173
Matt Arsenault	e830f54	2015-12-01 19:08:39 +0000	[diff] [blame]	174	int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	175	bool isSourceOfDivergence(const Value *V) const;
Alexander Timofeev	0f9c84c	2017-06-15 19:33:10 +0000	[diff] [blame]	176	bool isAlwaysUniform(const Value *V) const;
Michael Kuperstein	aa71bdd	2016-07-06 17:30:56 +0000	[diff] [blame]	177
Matt Arsenault	b6491cc	2017-01-31 01:20:54 +0000	[diff] [blame]	178	unsigned getFlatAddressSpace() const {
				179	// Don't bother running InferAddressSpaces pass on graphics shaders which
				180	// don't use flat addressing.
				181	if (IsGraphicsShader)
				182	return -1;
Matt Arsenault	b10f097	2019-06-17 14:13:24 +0000	[diff] [blame]	183	return AMDGPUAS::FLAT_ADDRESS;
Matt Arsenault	b6491cc	2017-01-31 01:20:54 +0000	[diff] [blame]	184	}
				185
Michael Kuperstein	aa71bdd	2016-07-06 17:30:56 +0000	[diff] [blame]	186	unsigned getVectorSplitCost() { return 0; }
Matt Arsenault	3c5e423	2017-05-10 21:29:33 +0000	[diff] [blame]	187
				188	unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
				189	Type *SubTp);
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	190
				191	bool areInlineCompatible(const Function *Caller,
				192	const Function *Callee) const;
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	193
				194	unsigned getInliningThresholdMultiplier() { return 9; }
Farhana Aleen	e2dfe8a	2018-05-01 21:41:12 +0000	[diff] [blame]	195
				196	int getArithmeticReductionCost(unsigned Opcode,
				197	Type *Ty,
				198	bool IsPairwise);
Farhana Aleen	e24f3ff	2018-05-09 21:18:34 +0000	[diff] [blame]	199	int getMinMaxReductionCost(Type Ty, Type CondTy,
				200	bool IsPairwiseForm,
				201	bool IsUnsigned);
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	202	};
				203
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	204	class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
				205	using BaseT = BasicTTIImplBase<R600TTIImpl>;
				206	using TTI = TargetTransformInfo;
				207
				208	friend BaseT;
				209
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	210	const R600Subtarget *ST;
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	211	const AMDGPUTargetLowering *TLI;
				212	AMDGPUTTIImpl CommonTTI;
				213
				214	public:
				215	explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
				216	: BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	217	ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	218	TLI(ST->getTargetLowering()),
				219	CommonTTI(TM, F) {}
				220
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	221	const R600Subtarget *getST() const { return ST; }
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	222	const AMDGPUTargetLowering *getTLI() const { return TLI; }
				223
				224	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
				225	TTI::UnrollingPreferences &UP);
				226	unsigned getHardwareNumberOfRegisters(bool Vec) const;
				227	unsigned getNumberOfRegisters(bool Vec) const;
				228	unsigned getRegisterBitWidth(bool Vector) const;
				229	unsigned getMinVectorRegisterBitWidth() const;
				230	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
				231	bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
				232	unsigned AddrSpace) const;
				233	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				234	unsigned Alignment,
				235	unsigned AddrSpace) const;
				236	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				237	unsigned Alignment,
				238	unsigned AddrSpace) const;
				239	unsigned getMaxInterleaveFactor(unsigned VF);
				240	unsigned getCFInstrCost(unsigned Opcode);
				241	int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
				242	};
				243
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	244	} // end namespace llvm
				245
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	246	#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H