blob: a5225f68af9ce405b0e3b5a062c543a5c58963ba [file] [log] [blame]
Eugene Zelenkod16eff82017-08-08 23:53:55 +00001//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
Chandler Carruth93dcdc42015-01-31 11:17:59 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Eugene Zelenkod16eff82017-08-08 23:53:55 +00009//
Chandler Carruth93dcdc42015-01-31 11:17:59 +000010/// \file
11/// This file a TargetTransformInfo::Concept conforming object specific to the
12/// AMDGPU target machine. It uses the target's detailed information to
13/// provide more precise answers to certain TTI queries, while letting the
14/// target independent and default TTI implementations handle the rest.
Eugene Zelenkod16eff82017-08-08 23:53:55 +000015//
Chandler Carruth93dcdc42015-01-31 11:17:59 +000016//===----------------------------------------------------------------------===//
17
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000018#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
Chandler Carruth93dcdc42015-01-31 11:17:59 +000020
21#include "AMDGPU.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000022#include "AMDGPUSubtarget.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000023#include "AMDGPUTargetMachine.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000024#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm/ADT/ArrayRef.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000026#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/CodeGen/BasicTTIImpl.h"
Eugene Zelenkod16eff82017-08-08 23:53:55 +000028#include "llvm/IR/Function.h"
29#include "llvm/MC/SubtargetFeature.h"
30#include "llvm/Support/MathExtras.h"
31#include <cassert>
Chandler Carruth93dcdc42015-01-31 11:17:59 +000032
33namespace llvm {
Eugene Zelenkod16eff82017-08-08 23:53:55 +000034
Matt Arsenault96518132016-03-25 01:00:32 +000035class AMDGPUTargetLowering;
Eugene Zelenkod16eff82017-08-08 23:53:55 +000036class Loop;
37class ScalarEvolution;
38class Type;
39class Value;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000040
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000041class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Eugene Zelenkod16eff82017-08-08 23:53:55 +000042 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43 using TTI = TargetTransformInfo;
44
Chandler Carruthc340ca82015-02-01 14:01:15 +000045 friend BaseT;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000046
47 const AMDGPUSubtarget *ST;
Chandler Carruthc340ca82015-02-01 14:01:15 +000048 const AMDGPUTargetLowering *TLI;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +000049 bool IsGraphicsShader;
Chandler Carruthc340ca82015-02-01 14:01:15 +000050
Matt Arsenaultaac47c12017-08-07 17:08:44 +000051 const FeatureBitset InlineFeatureIgnoreList = {
52 // Codegen control options which don't matter.
53 AMDGPU::FeatureEnableLoadStoreOpt,
54 AMDGPU::FeatureEnableSIScheduler,
55 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
56 AMDGPU::FeatureFlatForGlobal,
57 AMDGPU::FeaturePromoteAlloca,
58 AMDGPU::FeatureUnalignedBufferAccess,
59 AMDGPU::FeatureUnalignedScratchAccess,
60
61 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
62 AMDGPU::FeatureDebuggerEmitPrologue,
63 AMDGPU::FeatureDebuggerInsertNops,
64 AMDGPU::FeatureDebuggerReserveRegs,
65
66 // Property of the kernel/environment which can't actually differ.
67 AMDGPU::FeatureSGPRInitBug,
68 AMDGPU::FeatureXNACK,
69 AMDGPU::FeatureTrapHandler,
70
71 // Perf-tuning features
72 AMDGPU::FeatureFastFMAF32,
73 AMDGPU::HalfRate64Ops
74 };
75
Chandler Carruthc956ab662015-02-01 14:22:17 +000076 const AMDGPUSubtarget *getST() const { return ST; }
Chandler Carruthc340ca82015-02-01 14:01:15 +000077 const AMDGPUTargetLowering *getTLI() const { return TLI; }
Chandler Carruth93dcdc42015-01-31 11:17:59 +000078
Matt Arsenault96518132016-03-25 01:00:32 +000079 static inline int getFullRateInstrCost() {
80 return TargetTransformInfo::TCC_Basic;
81 }
82
83 static inline int getHalfRateInstrCost() {
84 return 2 * TargetTransformInfo::TCC_Basic;
85 }
86
87 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
88 // should be 2 or 4.
89 static inline int getQuarterRateInstrCost() {
90 return 3 * TargetTransformInfo::TCC_Basic;
91 }
92
93 // On some parts, normal fp64 operations are half rate, and others
94 // quarter. This also applies to some integer operations.
95 inline int get64BitInstrCost() const {
96 return ST->hasHalfRate64Ops() ?
97 getHalfRateInstrCost() : getQuarterRateInstrCost();
98 }
99
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000100public:
Matt Arsenault59c0ffa2016-06-27 20:48:03 +0000101 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
102 : BaseT(TM, F.getParent()->getDataLayout()),
103 ST(TM->getSubtargetImpl(F)),
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000104 TLI(ST->getTargetLowering()),
105 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000106
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000107 bool hasBranchDivergence() { return true; }
108
Geoff Berry66d9bdb2017-06-28 15:53:17 +0000109 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
110 TTI::UnrollingPreferences &UP);
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000111
112 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
113 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Matt Arsenault1735da42016-05-18 16:10:19 +0000114 return TTI::PSK_FastHardware;
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000115 }
116
Matt Arsenault67cd3472017-06-20 20:38:06 +0000117 unsigned getHardwareNumberOfRegisters(bool Vector) const;
118 unsigned getNumberOfRegisters(bool Vector) const;
Eugene Zelenkod16eff82017-08-08 23:53:55 +0000119 unsigned getRegisterBitWidth(bool Vector) const;
Matt Arsenault67cd3472017-06-20 20:38:06 +0000120 unsigned getMinVectorRegisterBitWidth() const;
Volkan Keles1c386812016-10-03 10:31:34 +0000121 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
Matt Arsenaultf0a88db2017-02-23 03:58:53 +0000122
123 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
124 unsigned Alignment,
125 unsigned AddrSpace) const;
126 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
127 unsigned Alignment,
128 unsigned AddrSpace) const;
129 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
130 unsigned Alignment,
131 unsigned AddrSpace) const;
132
Wei Mi062c7442015-05-06 17:12:25 +0000133 unsigned getMaxInterleaveFactor(unsigned VF);
Matt Arsenaulte830f542015-12-01 19:08:39 +0000134
Matt Arsenault96518132016-03-25 01:00:32 +0000135 int getArithmeticInstrCost(
136 unsigned Opcode, Type *Ty,
137 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
138 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
139 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
Mohammed Agabaria2c96c432017-01-11 08:23:37 +0000140 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
141 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
Matt Arsenault96518132016-03-25 01:00:32 +0000142
Matt Arsenaulte05ff152015-12-16 18:37:19 +0000143 unsigned getCFInstrCost(unsigned Opcode);
144
Matt Arsenaulte830f542015-12-01 19:08:39 +0000145 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
Tom Stellarddbe374b2015-12-15 18:04:38 +0000146 bool isSourceOfDivergence(const Value *V) const;
Alexander Timofeev0f9c84c2017-06-15 19:33:10 +0000147 bool isAlwaysUniform(const Value *V) const;
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000148
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000149 unsigned getFlatAddressSpace() const {
150 // Don't bother running InferAddressSpaces pass on graphics shaders which
151 // don't use flat addressing.
152 if (IsGraphicsShader)
153 return -1;
Matt Arsenault1575cb82017-01-31 23:48:37 +0000154 return ST->hasFlatAddressSpace() ?
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000155 ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000156 }
157
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000158 unsigned getVectorSplitCost() { return 0; }
Matt Arsenault3c5e4232017-05-10 21:29:33 +0000159
160 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
161 Type *SubTp);
Matt Arsenaultaac47c12017-08-07 17:08:44 +0000162
163 bool areInlineCompatible(const Function *Caller,
164 const Function *Callee) const;
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000165};
166
167} // end namespace llvm
168
Eugene Zelenkod16eff82017-08-08 23:53:55 +0000169#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H