| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 1 | //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 9 | // | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 10 | /// \file | 
|  | 11 | /// This file a TargetTransformInfo::Concept conforming object specific to the | 
|  | 12 | /// AMDGPU target machine. It uses the target's detailed information to | 
|  | 13 | /// provide more precise answers to certain TTI queries, while letting the | 
|  | 14 | /// target independent and default TTI implementations handle the rest. | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 15 | // | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 16 | //===----------------------------------------------------------------------===// | 
|  | 17 |  | 
| Matt Arsenault | 6b6a2c3 | 2016-03-11 08:00:27 +0000 | [diff] [blame] | 18 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H | 
|  | 19 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 20 |  | 
|  | 21 | #include "AMDGPU.h" | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 22 | #include "AMDGPUSubtarget.h" | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 23 | #include "AMDGPUTargetMachine.h" | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 24 | #include "Utils/AMDGPUBaseInfo.h" | 
|  | 25 | #include "llvm/ADT/ArrayRef.h" | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 26 | #include "llvm/Analysis/TargetTransformInfo.h" | 
|  | 27 | #include "llvm/CodeGen/BasicTTIImpl.h" | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 28 | #include "llvm/IR/Function.h" | 
|  | 29 | #include "llvm/MC/SubtargetFeature.h" | 
|  | 30 | #include "llvm/Support/MathExtras.h" | 
|  | 31 | #include <cassert> | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 32 |  | 
|  | 33 | namespace llvm { | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 34 |  | 
| Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 35 | class AMDGPUTargetLowering; | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 36 | class Loop; | 
|  | 37 | class ScalarEvolution; | 
|  | 38 | class Type; | 
|  | 39 | class Value; | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 40 |  | 
| Matt Arsenault | 6b6a2c3 | 2016-03-11 08:00:27 +0000 | [diff] [blame] | 41 | class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 42 | using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; | 
|  | 43 | using TTI = TargetTransformInfo; | 
|  | 44 |  | 
| Chandler Carruth | c340ca8 | 2015-02-01 14:01:15 +0000 | [diff] [blame] | 45 | friend BaseT; | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 46 |  | 
|  | 47 | const AMDGPUSubtarget *ST; | 
| Chandler Carruth | c340ca8 | 2015-02-01 14:01:15 +0000 | [diff] [blame] | 48 | const AMDGPUTargetLowering *TLI; | 
| Matt Arsenault | b6491cc | 2017-01-31 01:20:54 +0000 | [diff] [blame] | 49 | bool IsGraphicsShader; | 
| Chandler Carruth | c340ca8 | 2015-02-01 14:01:15 +0000 | [diff] [blame] | 50 |  | 
| Matt Arsenault | aac47c1 | 2017-08-07 17:08:44 +0000 | [diff] [blame] | 51 | const FeatureBitset InlineFeatureIgnoreList = { | 
|  | 52 | // Codegen control options which don't matter. | 
|  | 53 | AMDGPU::FeatureEnableLoadStoreOpt, | 
|  | 54 | AMDGPU::FeatureEnableSIScheduler, | 
|  | 55 | AMDGPU::FeatureEnableUnsafeDSOffsetFolding, | 
|  | 56 | AMDGPU::FeatureFlatForGlobal, | 
|  | 57 | AMDGPU::FeaturePromoteAlloca, | 
|  | 58 | AMDGPU::FeatureUnalignedBufferAccess, | 
|  | 59 | AMDGPU::FeatureUnalignedScratchAccess, | 
|  | 60 |  | 
|  | 61 | AMDGPU::FeatureAutoWaitcntBeforeBarrier, | 
|  | 62 | AMDGPU::FeatureDebuggerEmitPrologue, | 
|  | 63 | AMDGPU::FeatureDebuggerInsertNops, | 
|  | 64 | AMDGPU::FeatureDebuggerReserveRegs, | 
|  | 65 |  | 
|  | 66 | // Property of the kernel/environment which can't actually differ. | 
|  | 67 | AMDGPU::FeatureSGPRInitBug, | 
|  | 68 | AMDGPU::FeatureXNACK, | 
|  | 69 | AMDGPU::FeatureTrapHandler, | 
|  | 70 |  | 
|  | 71 | // Perf-tuning features | 
|  | 72 | AMDGPU::FeatureFastFMAF32, | 
|  | 73 | AMDGPU::HalfRate64Ops | 
|  | 74 | }; | 
|  | 75 |  | 
| Chandler Carruth | c956ab66 | 2015-02-01 14:22:17 +0000 | [diff] [blame] | 76 | const AMDGPUSubtarget *getST() const { return ST; } | 
| Chandler Carruth | c340ca8 | 2015-02-01 14:01:15 +0000 | [diff] [blame] | 77 | const AMDGPUTargetLowering *getTLI() const { return TLI; } | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 78 |  | 
| Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 79 | static inline int getFullRateInstrCost() { | 
|  | 80 | return TargetTransformInfo::TCC_Basic; | 
|  | 81 | } | 
|  | 82 |  | 
|  | 83 | static inline int getHalfRateInstrCost() { | 
|  | 84 | return 2 * TargetTransformInfo::TCC_Basic; | 
|  | 85 | } | 
|  | 86 |  | 
|  | 87 | // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe | 
|  | 88 | // should be 2 or 4. | 
|  | 89 | static inline int getQuarterRateInstrCost() { | 
|  | 90 | return 3 * TargetTransformInfo::TCC_Basic; | 
|  | 91 | } | 
|  | 92 |  | 
|  | 93 | // On some parts, normal fp64 operations are half rate, and others | 
|  | 94 | // quarter. This also applies to some integer operations. | 
|  | 95 | inline int get64BitInstrCost() const { | 
|  | 96 | return ST->hasHalfRate64Ops() ? | 
|  | 97 | getHalfRateInstrCost() : getQuarterRateInstrCost(); | 
|  | 98 | } | 
|  | 99 |  | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 100 | public: | 
| Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 101 | explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) | 
|  | 102 | : BaseT(TM, F.getParent()->getDataLayout()), | 
|  | 103 | ST(TM->getSubtargetImpl(F)), | 
| Matt Arsenault | b6491cc | 2017-01-31 01:20:54 +0000 | [diff] [blame] | 104 | TLI(ST->getTargetLowering()), | 
|  | 105 | IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 106 |  | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 107 | bool hasBranchDivergence() { return true; } | 
|  | 108 |  | 
| Geoff Berry | 66d9bdb | 2017-06-28 15:53:17 +0000 | [diff] [blame] | 109 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | 
|  | 110 | TTI::UnrollingPreferences &UP); | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 111 |  | 
|  | 112 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { | 
|  | 113 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); | 
| Matt Arsenault | 1735da4 | 2016-05-18 16:10:19 +0000 | [diff] [blame] | 114 | return TTI::PSK_FastHardware; | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 115 | } | 
|  | 116 |  | 
| Matt Arsenault | 67cd347 | 2017-06-20 20:38:06 +0000 | [diff] [blame] | 117 | unsigned getHardwareNumberOfRegisters(bool Vector) const; | 
|  | 118 | unsigned getNumberOfRegisters(bool Vector) const; | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 119 | unsigned getRegisterBitWidth(bool Vector) const; | 
| Matt Arsenault | 67cd347 | 2017-06-20 20:38:06 +0000 | [diff] [blame] | 120 | unsigned getMinVectorRegisterBitWidth() const; | 
| Volkan Keles | 1c38681 | 2016-10-03 10:31:34 +0000 | [diff] [blame] | 121 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; | 
| Matt Arsenault | f0a88db | 2017-02-23 03:58:53 +0000 | [diff] [blame] | 122 |  | 
|  | 123 | bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, | 
|  | 124 | unsigned Alignment, | 
|  | 125 | unsigned AddrSpace) const; | 
|  | 126 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | 
|  | 127 | unsigned Alignment, | 
|  | 128 | unsigned AddrSpace) const; | 
|  | 129 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | 
|  | 130 | unsigned Alignment, | 
|  | 131 | unsigned AddrSpace) const; | 
|  | 132 |  | 
| Wei Mi | 062c744 | 2015-05-06 17:12:25 +0000 | [diff] [blame] | 133 | unsigned getMaxInterleaveFactor(unsigned VF); | 
| Matt Arsenault | e830f54 | 2015-12-01 19:08:39 +0000 | [diff] [blame] | 134 |  | 
| Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 135 | int getArithmeticInstrCost( | 
|  | 136 | unsigned Opcode, Type *Ty, | 
|  | 137 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, | 
|  | 138 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, | 
|  | 139 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, | 
| Mohammed Agabaria | 2c96c43 | 2017-01-11 08:23:37 +0000 | [diff] [blame] | 140 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, | 
|  | 141 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()); | 
| Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 142 |  | 
| Matt Arsenault | e05ff15 | 2015-12-16 18:37:19 +0000 | [diff] [blame] | 143 | unsigned getCFInstrCost(unsigned Opcode); | 
|  | 144 |  | 
| Matt Arsenault | e830f54 | 2015-12-01 19:08:39 +0000 | [diff] [blame] | 145 | int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); | 
| Tom Stellard | dbe374b | 2015-12-15 18:04:38 +0000 | [diff] [blame] | 146 | bool isSourceOfDivergence(const Value *V) const; | 
| Alexander Timofeev | 0f9c84c | 2017-06-15 19:33:10 +0000 | [diff] [blame] | 147 | bool isAlwaysUniform(const Value *V) const; | 
| Michael Kuperstein | aa71bdd | 2016-07-06 17:30:56 +0000 | [diff] [blame] | 148 |  | 
| Matt Arsenault | b6491cc | 2017-01-31 01:20:54 +0000 | [diff] [blame] | 149 | unsigned getFlatAddressSpace() const { | 
|  | 150 | // Don't bother running InferAddressSpaces pass on graphics shaders which | 
|  | 151 | // don't use flat addressing. | 
|  | 152 | if (IsGraphicsShader) | 
|  | 153 | return -1; | 
| Matt Arsenault | 1575cb8 | 2017-01-31 23:48:37 +0000 | [diff] [blame] | 154 | return ST->hasFlatAddressSpace() ? | 
| Yaxun Liu | 1a14bfa | 2017-03-27 14:04:01 +0000 | [diff] [blame] | 155 | ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; | 
| Matt Arsenault | b6491cc | 2017-01-31 01:20:54 +0000 | [diff] [blame] | 156 | } | 
|  | 157 |  | 
| Michael Kuperstein | aa71bdd | 2016-07-06 17:30:56 +0000 | [diff] [blame] | 158 | unsigned getVectorSplitCost() { return 0; } | 
| Matt Arsenault | 3c5e423 | 2017-05-10 21:29:33 +0000 | [diff] [blame] | 159 |  | 
|  | 160 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | 
|  | 161 | Type *SubTp); | 
| Matt Arsenault | aac47c1 | 2017-08-07 17:08:44 +0000 | [diff] [blame] | 162 |  | 
|  | 163 | bool areInlineCompatible(const Function *Caller, | 
|  | 164 | const Function *Callee) const; | 
| Stanislav Mekhanoshin | 5670e6d | 2017-09-20 04:25:58 +0000 | [diff] [blame] | 165 |  | 
|  | 166 | unsigned getInliningThresholdMultiplier() { return 9; } | 
| Chandler Carruth | 93dcdc4 | 2015-01-31 11:17:59 +0000 | [diff] [blame] | 167 | }; | 
|  | 168 |  | 
|  | 169 | } // end namespace llvm | 
|  | 170 |  | 
| Eugene Zelenko | d16eff8 | 2017-08-08 23:53:55 +0000 | [diff] [blame] | 171 | #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H |