blob: 5c0e0297f2c62644c55236f28e0e36411ee583d5 [file] [log] [blame]
Chandler Carruth93dcdc42015-01-31 11:17:59 +00001//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file a TargetTransformInfo::Concept conforming object specific to the
11/// AMDGPU target machine. It uses the target's detailed information to
12/// provide more precise answers to certain TTI queries, while letting the
13/// target independent and default TTI implementations handle the rest.
14///
15//===----------------------------------------------------------------------===//
16
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000017#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
Chandler Carruth93dcdc42015-01-31 11:17:59 +000019
20#include "AMDGPU.h"
21#include "AMDGPUTargetMachine.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/CodeGen/BasicTTIImpl.h"
Chandler Carruth93dcdc42015-01-31 11:17:59 +000024
25namespace llvm {
Matt Arsenault96518132016-03-25 01:00:32 +000026class AMDGPUTargetLowering;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000027
Matt Arsenault6b6a2c32016-03-11 08:00:27 +000028class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Chandler Carruth93dcdc42015-01-31 11:17:59 +000029 typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
30 typedef TargetTransformInfo TTI;
Chandler Carruthc340ca82015-02-01 14:01:15 +000031 friend BaseT;
Chandler Carruth93dcdc42015-01-31 11:17:59 +000032
33 const AMDGPUSubtarget *ST;
Chandler Carruthc340ca82015-02-01 14:01:15 +000034 const AMDGPUTargetLowering *TLI;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +000035 bool IsGraphicsShader;
Chandler Carruthc340ca82015-02-01 14:01:15 +000036
Matt Arsenaultaac47c12017-08-07 17:08:44 +000037
38 const FeatureBitset InlineFeatureIgnoreList = {
39 // Codegen control options which don't matter.
40 AMDGPU::FeatureEnableLoadStoreOpt,
41 AMDGPU::FeatureEnableSIScheduler,
42 AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
43 AMDGPU::FeatureFlatForGlobal,
44 AMDGPU::FeaturePromoteAlloca,
45 AMDGPU::FeatureUnalignedBufferAccess,
46 AMDGPU::FeatureUnalignedScratchAccess,
47
48 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
49 AMDGPU::FeatureDebuggerEmitPrologue,
50 AMDGPU::FeatureDebuggerInsertNops,
51 AMDGPU::FeatureDebuggerReserveRegs,
52
53 // Property of the kernel/environment which can't actually differ.
54 AMDGPU::FeatureSGPRInitBug,
55 AMDGPU::FeatureXNACK,
56 AMDGPU::FeatureTrapHandler,
57
58 // Perf-tuning features
59 AMDGPU::FeatureFastFMAF32,
60 AMDGPU::HalfRate64Ops
61 };
62
Chandler Carruthc956ab662015-02-01 14:22:17 +000063 const AMDGPUSubtarget *getST() const { return ST; }
Chandler Carruthc340ca82015-02-01 14:01:15 +000064 const AMDGPUTargetLowering *getTLI() const { return TLI; }
Chandler Carruth93dcdc42015-01-31 11:17:59 +000065
Matt Arsenault96518132016-03-25 01:00:32 +000066
67 static inline int getFullRateInstrCost() {
68 return TargetTransformInfo::TCC_Basic;
69 }
70
71 static inline int getHalfRateInstrCost() {
72 return 2 * TargetTransformInfo::TCC_Basic;
73 }
74
75 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
76 // should be 2 or 4.
77 static inline int getQuarterRateInstrCost() {
78 return 3 * TargetTransformInfo::TCC_Basic;
79 }
80
81 // On some parts, normal fp64 operations are half rate, and others
82 // quarter. This also applies to some integer operations.
83 inline int get64BitInstrCost() const {
84 return ST->hasHalfRate64Ops() ?
85 getHalfRateInstrCost() : getQuarterRateInstrCost();
86 }
87
Chandler Carruth93dcdc42015-01-31 11:17:59 +000088public:
Matt Arsenault59c0ffa2016-06-27 20:48:03 +000089 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
90 : BaseT(TM, F.getParent()->getDataLayout()),
91 ST(TM->getSubtargetImpl(F)),
Matt Arsenaultb6491cc2017-01-31 01:20:54 +000092 TLI(ST->getTargetLowering()),
93 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
Chandler Carruth93dcdc42015-01-31 11:17:59 +000094
Chandler Carruth93dcdc42015-01-31 11:17:59 +000095 bool hasBranchDivergence() { return true; }
96
Geoff Berry66d9bdb2017-06-28 15:53:17 +000097 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
98 TTI::UnrollingPreferences &UP);
Chandler Carruth93dcdc42015-01-31 11:17:59 +000099
100 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
101 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Matt Arsenault1735da42016-05-18 16:10:19 +0000102 return TTI::PSK_FastHardware;
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000103 }
104
Matt Arsenault67cd3472017-06-20 20:38:06 +0000105 unsigned getHardwareNumberOfRegisters(bool Vector) const;
106 unsigned getNumberOfRegisters(bool Vector) const;
107 unsigned getRegisterBitWidth(bool Vector) const ;
108 unsigned getMinVectorRegisterBitWidth() const;
Volkan Keles1c386812016-10-03 10:31:34 +0000109 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
Matt Arsenaultf0a88db2017-02-23 03:58:53 +0000110
111 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
112 unsigned Alignment,
113 unsigned AddrSpace) const;
114 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
115 unsigned Alignment,
116 unsigned AddrSpace) const;
117 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
118 unsigned Alignment,
119 unsigned AddrSpace) const;
120
Wei Mi062c7442015-05-06 17:12:25 +0000121 unsigned getMaxInterleaveFactor(unsigned VF);
Matt Arsenaulte830f542015-12-01 19:08:39 +0000122
Matt Arsenault96518132016-03-25 01:00:32 +0000123 int getArithmeticInstrCost(
124 unsigned Opcode, Type *Ty,
125 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
126 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
127 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
Mohammed Agabaria2c96c432017-01-11 08:23:37 +0000128 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
129 ArrayRef<const Value *> Args = ArrayRef<const Value *>());
Matt Arsenault96518132016-03-25 01:00:32 +0000130
Matt Arsenaulte05ff152015-12-16 18:37:19 +0000131 unsigned getCFInstrCost(unsigned Opcode);
132
Matt Arsenaulte830f542015-12-01 19:08:39 +0000133 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
Tom Stellarddbe374b2015-12-15 18:04:38 +0000134 bool isSourceOfDivergence(const Value *V) const;
Alexander Timofeev0f9c84c2017-06-15 19:33:10 +0000135 bool isAlwaysUniform(const Value *V) const;
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000136
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000137 unsigned getFlatAddressSpace() const {
138 // Don't bother running InferAddressSpaces pass on graphics shaders which
139 // don't use flat addressing.
140 if (IsGraphicsShader)
141 return -1;
Matt Arsenault1575cb82017-01-31 23:48:37 +0000142 return ST->hasFlatAddressSpace() ?
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000143 ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
Matt Arsenaultb6491cc2017-01-31 01:20:54 +0000144 }
145
Michael Kupersteinaa71bdd2016-07-06 17:30:56 +0000146 unsigned getVectorSplitCost() { return 0; }
Matt Arsenault3c5e4232017-05-10 21:29:33 +0000147
148 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
149 Type *SubTp);
Matt Arsenaultaac47c12017-08-07 17:08:44 +0000150
151 bool areInlineCompatible(const Function *Caller,
152 const Function *Callee) const;
Chandler Carruth93dcdc42015-01-31 11:17:59 +0000153};
154
155} // end namespace llvm
156
157#endif