blob: 72882c83c01c168421af90dc30cea7a7e47d8cab [file] [log] [blame]
Eugene Zelenkod16eff82017-08-08 23:53:55 +00001//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
Chandler Carruth93dcdc42015-01-31 11:17:59 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Chandler Carruth93dcdc42015-01-31 11:17:59 +00006//
7//===----------------------------------------------------------------------===//
Eugene Zelenkod16eff82017-08-08 23:53:55 +00008//
Chandler Carruth93dcdc42015-01-31 11:17:59 +00009/// \file
10/// This file a TargetTransformInfo::Concept conforming object specific to the
11/// AMDGPU target machine. It uses the target's detailed information to
12/// provide more precise answers to certain TTI queries, while letting the
13/// target independent and default TTI implementations handle the rest.
Eugene Zelenkod16eff82017-08-08 23:53:55 +000014//
Chandler Carruth93dcdc42015-01-31 11:17:59 +000015//===----------------------------------------------------------------------===//
16
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000017#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
Chandler Carruth93dcdc42015-01-31 11:17:59 +000019
20#include "AMDGPU.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000021#include "AMDGPUSubtarget.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000022#include "AMDGPUTargetMachine.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000023#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000024#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm/ADT/ArrayRef.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000026#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/CodeGen/BasicTTIImpl.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000028#include "llvm/IR/Function.h"
29#include "llvm/MC/SubtargetFeature.h"
30#include "llvm/Support/MathExtras.h"
31#include <cassert>
Chandler Carruth93dcdc42015-01-31 11:17:59 +000032
33namespace llvm {
Eugene Zelenkod16eff82017-08-08 23:53:55 +000034
Matt Arsenault96518132016-03-25 01:00:32 +000035class AMDGPUTargetLowering;
Eugene Zelenkod16eff82017-08-08 23:53:55 +000036class Loop;
37class ScalarEvolution;
38class Type;
39class Value;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000040
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000041class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Eugene Zelenkod16eff82017-08-08 23:53:55 +000042 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43 using TTI = TargetTransformInfo;
44
Chandler Carruthc340ca82015-02-01 14:01:15 +000045 friend BaseT;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000046
Tom Stellardc5a154d2018-06-28 23:47:12 +000047 Triple TargetTriple;
Tom Stellardc7624312018-05-30 22:55:35 +000048
49public:
50 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
51 : BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellardc5a154d2018-06-28 23:47:12 +000052 TargetTriple(TM->getTargetTriple()) {}
Tom Stellardc7624312018-05-30 22:55:35 +000053
54 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
55 TTI::UnrollingPreferences &UP);
56};
57
58class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
59 using BaseT = BasicTTIImplBase<GCNTTIImpl>;
60 using TTI = TargetTransformInfo;
61
62 friend BaseT;
63
Tom Stellard5bfbae52018-07-11 20:59:01 +000064 const GCNSubtarget *ST;
Tom Stellardc7624312018-05-30 22:55:35 +000065 const AMDGPUTargetLowering *TLI;
66 AMDGPUTTIImpl CommonTTI;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +000067 bool IsGraphicsShader;
Chandler Carruthc340ca82015-02-01 14:01:15 +000068
Matt Arsenaultaac47c12017-08-07 17:08:44 +000069 const FeatureBitset InlineFeatureIgnoreList = {
70 // Codegen control options which don't matter.
71 AMDGPU::FeatureEnableLoadStoreOpt,
72 AMDGPU::FeatureEnableSIScheduler,
73 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
74 AMDGPU::FeatureFlatForGlobal,
75 AMDGPU::FeaturePromoteAlloca,
76 AMDGPU::FeatureUnalignedBufferAccess,
77 AMDGPU::FeatureUnalignedScratchAccess,
78
79 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
Matt Arsenaultaac47c12017-08-07 17:08:44 +000080
81 // Property of the kernel/environment which can't actually differ.
82 AMDGPU::FeatureSGPRInitBug,
83 AMDGPU::FeatureXNACK,
84 AMDGPU::FeatureTrapHandler,
Matt Arsenaultd24296e2019-02-12 23:30:11 +000085 AMDGPU::FeatureCodeObjectV3,
Matt Arsenaultaac47c12017-08-07 17:08:44 +000086
Matt Arsenaultf426ddb2019-04-03 01:58:57 +000087 // The default assumption needs to be ecc is enabled, but no directly
88 // exposed operations depend on it, so it can be safely inlined.
89 AMDGPU::FeatureSRAMECC,
90
Matt Arsenaultaac47c12017-08-07 17:08:44 +000091 // Perf-tuning features
92 AMDGPU::FeatureFastFMAF32,
93 AMDGPU::HalfRate64Ops
94 };
95
Tom Stellard5bfbae52018-07-11 20:59:01 +000096 const GCNSubtarget *getST() const { return ST; }
Chandler Carruthc340ca82015-02-01 14:01:15 +000097 const AMDGPUTargetLowering *getTLI() const { return TLI; }
Chandler Carruth93dcdc42015-01-31 11:17:59 +000098
Matt Arsenault96518132016-03-25 01:00:32 +000099 static inline int getFullRateInstrCost() {
100 return TargetTransformInfo::TCC_Basic;
101 }
102
103 static inline int getHalfRateInstrCost() {
104 return 2 * TargetTransformInfo::TCC_Basic;
105 }
106
107 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
108 // should be 2 or 4.
109 static inline int getQuarterRateInstrCost() {
110 return 3 * TargetTransformInfo::TCC_Basic;
111 }
112
113 // On some parts, normal fp64 operations are half rate, and others
114 // quarter. This also applies to some integer operations.
115 inline int get64BitInstrCost() const {
116 return ST->hasHalfRate64Ops() ?
117 getHalfRateInstrCost() : getQuarterRateInstrCost();
118 }
119
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000120public:
Tom Stellardc7624312018-05-30 22:55:35 +0000121 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000122 : BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellard5bfbae52018-07-11 20:59:01 +0000123 ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000124 TLI(ST->getTargetLowering()),
Tom Stellardc7624312018-05-30 22:55:35 +0000125 CommonTTI(TM, F),
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000126 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000127
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000128 bool hasBranchDivergence() { return true; }
129
Geoff Berry66d9bdb2017-06-28 15:53:17 +0000130 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
131 TTI::UnrollingPreferences &UP);
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000132
133 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
134 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Matt Arsenault1735da42016-05-18 16:10:19 +0000135 return TTI::PSK_FastHardware;
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000136 }
137
Matt Arsenault67cd3472017-06-20 20:38:06 +0000138 unsigned getHardwareNumberOfRegisters(bool Vector) const;
139 unsigned getNumberOfRegisters(bool Vector) const;
Eugene Zelenkod16eff82017-08-08 23:53:55 +0000140 unsigned getRegisterBitWidth(bool Vector) const;
Matt Arsenault67cd3472017-06-20 20:38:06 +0000141 unsigned getMinVectorRegisterBitWidth() const;
Farhana Aleen89196642018-03-07 17:09:18 +0000142 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
143 unsigned ChainSizeInBytes,
144 VectorType *VecTy) const;
145 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
146 unsigned ChainSizeInBytes,
147 VectorType *VecTy) const;
Volkan Keles1c386812016-10-03 10:31:34 +0000148 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
Matt Arsenaultf0a88db2017-02-23 03:58:53 +0000149
150 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
151 unsigned Alignment,
152 unsigned AddrSpace) const;
153 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
154 unsigned Alignment,
155 unsigned AddrSpace) const;
156 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
157 unsigned Alignment,
158 unsigned AddrSpace) const;
159
Wei Mi062c7442015-05-06 17:12:25 +0000160 unsigned getMaxInterleaveFactor(unsigned VF);
Matt Arsenaulte830f542015-12-01 19:08:39 +0000161
Matt Arsenault3e268cc2017-12-11 21:38:43 +0000162 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
163
Matt Arsenault96518132016-03-25 01:00:32 +0000164 int getArithmeticInstrCost(
165 unsigned Opcode, Type *Ty,
166 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
167 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
168 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
Mohammed Agabaria2c96c432017-01-11 08:23:37 +0000169 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
170 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
Matt Arsenault96518132016-03-25 01:00:32 +0000171
Matt Arsenaulte05ff152015-12-16 18:37:19 +0000172 unsigned getCFInstrCost(unsigned Opcode);
173
Matt Arsenaulte830f542015-12-01 19:08:39 +0000174 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
Tom Stellarddbe374b2015-12-15 18:04:38 +0000175 bool isSourceOfDivergence(const Value *V) const;
Alexander Timofeev0f9c84c2017-06-15 19:33:10 +0000176 bool isAlwaysUniform(const Value *V) const;
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000177
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000178 unsigned getFlatAddressSpace() const {
179 // Don't bother running InferAddressSpaces pass on graphics shaders which
180 // don't use flat addressing.
181 if (IsGraphicsShader)
182 return -1;
Matt Arsenaultb10f0972019-06-17 14:13:24 +0000183 return AMDGPUAS::FLAT_ADDRESS;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000184 }
185
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000186 unsigned getVectorSplitCost() { return 0; }
Matt Arsenault3c5e4232017-05-10 21:29:33 +0000187
188 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
189 Type *SubTp);
Matt Arsenaultaac47c12017-08-07 17:08:44 +0000190
191 bool areInlineCompatible(const Function *Caller,
192 const Function *Callee) const;
Stanislav Mekhanoshin5670e6d2017-09-20 04:25:58 +0000193
194 unsigned getInliningThresholdMultiplier() { return 9; }
Farhana Aleene2dfe8a2018-05-01 21:41:12 +0000195
196 int getArithmeticReductionCost(unsigned Opcode,
197 Type *Ty,
198 bool IsPairwise);
Farhana Aleene24f3ff2018-05-09 21:18:34 +0000199 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
200 bool IsPairwiseForm,
201 bool IsUnsigned);
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000202};
203
Tom Stellardc7624312018-05-30 22:55:35 +0000204class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
205 using BaseT = BasicTTIImplBase<R600TTIImpl>;
206 using TTI = TargetTransformInfo;
207
208 friend BaseT;
209
Tom Stellardc5a154d2018-06-28 23:47:12 +0000210 const R600Subtarget *ST;
Tom Stellardc7624312018-05-30 22:55:35 +0000211 const AMDGPUTargetLowering *TLI;
212 AMDGPUTTIImpl CommonTTI;
213
214public:
215 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
216 : BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellardc5a154d2018-06-28 23:47:12 +0000217 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
Tom Stellardc7624312018-05-30 22:55:35 +0000218 TLI(ST->getTargetLowering()),
219 CommonTTI(TM, F) {}
220
Tom Stellardc5a154d2018-06-28 23:47:12 +0000221 const R600Subtarget *getST() const { return ST; }
Tom Stellardc7624312018-05-30 22:55:35 +0000222 const AMDGPUTargetLowering *getTLI() const { return TLI; }
223
224 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
225 TTI::UnrollingPreferences &UP);
226 unsigned getHardwareNumberOfRegisters(bool Vec) const;
227 unsigned getNumberOfRegisters(bool Vec) const;
228 unsigned getRegisterBitWidth(bool Vector) const;
229 unsigned getMinVectorRegisterBitWidth() const;
230 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
231 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
232 unsigned AddrSpace) const;
233 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
234 unsigned Alignment,
235 unsigned AddrSpace) const;
236 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
237 unsigned Alignment,
238 unsigned AddrSpace) const;
239 unsigned getMaxInterleaveFactor(unsigned VF);
240 unsigned getCFInstrCost(unsigned Opcode);
241 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
242};
243
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000244} // end namespace llvm
245
Eugene Zelenkod16eff82017-08-08 23:53:55 +0000246#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H