blob: b15348d404a383c3d6699ffd5b235e1fa31efa36 [file] [log] [blame]
Eugene Zelenkod16eff82017-08-08 23:53:55 +00001//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
Chandler Carruth93dcdc42015-01-31 11:17:59 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Eugene Zelenkod16eff82017-08-08 23:53:55 +00009//
Chandler Carruth93dcdc42015-01-31 11:17:59 +000010/// \file
11/// This file a TargetTransformInfo::Concept conforming object specific to the
12/// AMDGPU target machine. It uses the target's detailed information to
13/// provide more precise answers to certain TTI queries, while letting the
14/// target independent and default TTI implementations handle the rest.
Eugene Zelenkod16eff82017-08-08 23:53:55 +000015//
Chandler Carruth93dcdc42015-01-31 11:17:59 +000016//===----------------------------------------------------------------------===//
17
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000018#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
Chandler Carruth93dcdc42015-01-31 11:17:59 +000020
21#include "AMDGPU.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000022#include "AMDGPUSubtarget.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000023#include "AMDGPUTargetMachine.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000024#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000025#include "Utils/AMDGPUBaseInfo.h"
26#include "llvm/ADT/ArrayRef.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000027#include "llvm/Analysis/TargetTransformInfo.h"
28#include "llvm/CodeGen/BasicTTIImpl.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000029#include "llvm/IR/Function.h"
30#include "llvm/MC/SubtargetFeature.h"
31#include "llvm/Support/MathExtras.h"
32#include <cassert>
Chandler Carruth93dcdc42015-01-31 11:17:59 +000033
34namespace llvm {
Eugene Zelenkod16eff82017-08-08 23:53:55 +000035
Matt Arsenault96518132016-03-25 01:00:32 +000036class AMDGPUTargetLowering;
Eugene Zelenkod16eff82017-08-08 23:53:55 +000037class Loop;
38class ScalarEvolution;
39class Type;
40class Value;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000041
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000042class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Eugene Zelenkod16eff82017-08-08 23:53:55 +000043 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
44 using TTI = TargetTransformInfo;
45
Chandler Carruthc340ca82015-02-01 14:01:15 +000046 friend BaseT;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000047
Tom Stellardc5a154d2018-06-28 23:47:12 +000048 Triple TargetTriple;
Tom Stellardc7624312018-05-30 22:55:35 +000049
50public:
51 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
52 : BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellardc5a154d2018-06-28 23:47:12 +000053 TargetTriple(TM->getTargetTriple()) {}
Tom Stellardc7624312018-05-30 22:55:35 +000054
55 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
56 TTI::UnrollingPreferences &UP);
57};
58
59class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
60 using BaseT = BasicTTIImplBase<GCNTTIImpl>;
61 using TTI = TargetTransformInfo;
62
63 friend BaseT;
64
65 const AMDGPUSubtarget *ST;
66 const AMDGPUTargetLowering *TLI;
67 AMDGPUTTIImpl CommonTTI;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +000068 bool IsGraphicsShader;
Chandler Carruthc340ca82015-02-01 14:01:15 +000069
Matt Arsenaultaac47c12017-08-07 17:08:44 +000070 const FeatureBitset InlineFeatureIgnoreList = {
71 // Codegen control options which don't matter.
72 AMDGPU::FeatureEnableLoadStoreOpt,
73 AMDGPU::FeatureEnableSIScheduler,
74 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
75 AMDGPU::FeatureFlatForGlobal,
76 AMDGPU::FeaturePromoteAlloca,
77 AMDGPU::FeatureUnalignedBufferAccess,
78 AMDGPU::FeatureUnalignedScratchAccess,
79
80 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
81 AMDGPU::FeatureDebuggerEmitPrologue,
82 AMDGPU::FeatureDebuggerInsertNops,
Matt Arsenaultaac47c12017-08-07 17:08:44 +000083
84 // Property of the kernel/environment which can't actually differ.
85 AMDGPU::FeatureSGPRInitBug,
86 AMDGPU::FeatureXNACK,
87 AMDGPU::FeatureTrapHandler,
88
89 // Perf-tuning features
90 AMDGPU::FeatureFastFMAF32,
91 AMDGPU::HalfRate64Ops
92 };
93
Chandler Carruthc956ab662015-02-01 14:22:17 +000094 const AMDGPUSubtarget *getST() const { return ST; }
Chandler Carruthc340ca82015-02-01 14:01:15 +000095 const AMDGPUTargetLowering *getTLI() const { return TLI; }
Chandler Carruth93dcdc42015-01-31 11:17:59 +000096
Matt Arsenault96518132016-03-25 01:00:32 +000097 static inline int getFullRateInstrCost() {
98 return TargetTransformInfo::TCC_Basic;
99 }
100
101 static inline int getHalfRateInstrCost() {
102 return 2 * TargetTransformInfo::TCC_Basic;
103 }
104
105 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
106 // should be 2 or 4.
107 static inline int getQuarterRateInstrCost() {
108 return 3 * TargetTransformInfo::TCC_Basic;
109 }
110
111 // On some parts, normal fp64 operations are half rate, and others
112 // quarter. This also applies to some integer operations.
113 inline int get64BitInstrCost() const {
114 return ST->hasHalfRate64Ops() ?
115 getHalfRateInstrCost() : getQuarterRateInstrCost();
116 }
117
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000118public:
Tom Stellardc7624312018-05-30 22:55:35 +0000119 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000120 : BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellardc5a154d2018-06-28 23:47:12 +0000121 ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000122 TLI(ST->getTargetLowering()),
Tom Stellardc7624312018-05-30 22:55:35 +0000123 CommonTTI(TM, F),
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000124 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000125
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000126 bool hasBranchDivergence() { return true; }
127
Geoff Berry66d9bdb2017-06-28 15:53:17 +0000128 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
129 TTI::UnrollingPreferences &UP);
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000130
131 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
132 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Matt Arsenault1735da42016-05-18 16:10:19 +0000133 return TTI::PSK_FastHardware;
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000134 }
135
Matt Arsenault67cd3472017-06-20 20:38:06 +0000136 unsigned getHardwareNumberOfRegisters(bool Vector) const;
137 unsigned getNumberOfRegisters(bool Vector) const;
Eugene Zelenkod16eff82017-08-08 23:53:55 +0000138 unsigned getRegisterBitWidth(bool Vector) const;
Matt Arsenault67cd3472017-06-20 20:38:06 +0000139 unsigned getMinVectorRegisterBitWidth() const;
Farhana Aleen89196642018-03-07 17:09:18 +0000140 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
141 unsigned ChainSizeInBytes,
142 VectorType *VecTy) const;
143 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
144 unsigned ChainSizeInBytes,
145 VectorType *VecTy) const;
Volkan Keles1c386812016-10-03 10:31:34 +0000146 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
Matt Arsenaultf0a88db2017-02-23 03:58:53 +0000147
148 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
149 unsigned Alignment,
150 unsigned AddrSpace) const;
151 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
152 unsigned Alignment,
153 unsigned AddrSpace) const;
154 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
155 unsigned Alignment,
156 unsigned AddrSpace) const;
157
Wei Mi062c7442015-05-06 17:12:25 +0000158 unsigned getMaxInterleaveFactor(unsigned VF);
Matt Arsenaulte830f542015-12-01 19:08:39 +0000159
Matt Arsenault3e268cc2017-12-11 21:38:43 +0000160 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
161
Matt Arsenault96518132016-03-25 01:00:32 +0000162 int getArithmeticInstrCost(
163 unsigned Opcode, Type *Ty,
164 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
165 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
166 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
Mohammed Agabaria2c96c432017-01-11 08:23:37 +0000167 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
168 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
Matt Arsenault96518132016-03-25 01:00:32 +0000169
Matt Arsenaulte05ff152015-12-16 18:37:19 +0000170 unsigned getCFInstrCost(unsigned Opcode);
171
Matt Arsenaulte830f542015-12-01 19:08:39 +0000172 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
Tom Stellarddbe374b2015-12-15 18:04:38 +0000173 bool isSourceOfDivergence(const Value *V) const;
Alexander Timofeev0f9c84c2017-06-15 19:33:10 +0000174 bool isAlwaysUniform(const Value *V) const;
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000175
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000176 unsigned getFlatAddressSpace() const {
177 // Don't bother running InferAddressSpaces pass on graphics shaders which
178 // don't use flat addressing.
179 if (IsGraphicsShader)
180 return -1;
Matt Arsenault1575cb82017-01-31 23:48:37 +0000181 return ST->hasFlatAddressSpace() ?
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000182 ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000183 }
184
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000185 unsigned getVectorSplitCost() { return 0; }
Matt Arsenault3c5e4232017-05-10 21:29:33 +0000186
187 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
188 Type *SubTp);
Matt Arsenaultaac47c12017-08-07 17:08:44 +0000189
190 bool areInlineCompatible(const Function *Caller,
191 const Function *Callee) const;
Stanislav Mekhanoshin5670e6d2017-09-20 04:25:58 +0000192
193 unsigned getInliningThresholdMultiplier() { return 9; }
Farhana Aleene2dfe8a2018-05-01 21:41:12 +0000194
195 int getArithmeticReductionCost(unsigned Opcode,
196 Type *Ty,
197 bool IsPairwise);
Farhana Aleene24f3ff2018-05-09 21:18:34 +0000198 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
199 bool IsPairwiseForm,
200 bool IsUnsigned);
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000201};
202
Tom Stellardc7624312018-05-30 22:55:35 +0000203class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
204 using BaseT = BasicTTIImplBase<R600TTIImpl>;
205 using TTI = TargetTransformInfo;
206
207 friend BaseT;
208
Tom Stellardc5a154d2018-06-28 23:47:12 +0000209 const R600Subtarget *ST;
Tom Stellardc7624312018-05-30 22:55:35 +0000210 const AMDGPUTargetLowering *TLI;
211 AMDGPUTTIImpl CommonTTI;
212
213public:
214 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
215 : BaseT(TM, F.getParent()->getDataLayout()),
Tom Stellardc5a154d2018-06-28 23:47:12 +0000216 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
Tom Stellardc7624312018-05-30 22:55:35 +0000217 TLI(ST->getTargetLowering()),
218 CommonTTI(TM, F) {}
219
Tom Stellardc5a154d2018-06-28 23:47:12 +0000220 const R600Subtarget *getST() const { return ST; }
Tom Stellardc7624312018-05-30 22:55:35 +0000221 const AMDGPUTargetLowering *getTLI() const { return TLI; }
222
223 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
224 TTI::UnrollingPreferences &UP);
225 unsigned getHardwareNumberOfRegisters(bool Vec) const;
226 unsigned getNumberOfRegisters(bool Vec) const;
227 unsigned getRegisterBitWidth(bool Vector) const;
228 unsigned getMinVectorRegisterBitWidth() const;
229 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
230 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
231 unsigned AddrSpace) const;
232 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
233 unsigned Alignment,
234 unsigned AddrSpace) const;
235 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
236 unsigned Alignment,
237 unsigned AddrSpace) const;
238 unsigned getMaxInterleaveFactor(unsigned VF);
239 unsigned getCFInstrCost(unsigned Opcode);
240 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
241};
242
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000243} // end namespace llvm
244
Eugene Zelenkod16eff82017-08-08 23:53:55 +0000245#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H