blob: eaba1537e0128180ec73673f0808506895069873 [file] [log] [blame]
Tom Stellard8b1e0212013-07-27 00:01:07 +00001//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// \file
11// This file implements a TargetTransformInfo analysis pass specific to the
12// AMDGPU target machine. It uses the target's detailed information to provide
13// more precise answers to certain TTI queries, while letting the target
14// independent and default TTI implementations handle the rest.
15//
16//===----------------------------------------------------------------------===//
17
Tom Stellard8b1e0212013-07-27 00:01:07 +000018#include "AMDGPU.h"
19#include "AMDGPUTargetMachine.h"
Tom Stellard8cce9bd2014-01-23 18:49:28 +000020#include "llvm/Analysis/LoopInfo.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000021#include "llvm/Analysis/TargetTransformInfo.h"
Tom Stellard8cce9bd2014-01-23 18:49:28 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000023#include "llvm/Support/Debug.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000024#include "llvm/Target/CostTable.h"
Chandler Carruth8a8cd2b2014-01-07 11:48:04 +000025#include "llvm/Target/TargetLowering.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000026using namespace llvm;
27
Chandler Carruth84e68b22014-04-22 02:41:26 +000028#define DEBUG_TYPE "AMDGPUtti"
29
Tom Stellard8b1e0212013-07-27 00:01:07 +000030// Declare the pass initialization routine locally as target-specific passes
31// don't have a target-wide initialization entry point, and so we rely on the
32// pass constructor initialization.
33namespace llvm {
34void initializeAMDGPUTTIPass(PassRegistry &);
35}
36
37namespace {
38
Craig Topper77dfe452014-03-02 08:08:51 +000039class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
Tom Stellard8b1e0212013-07-27 00:01:07 +000040 const AMDGPUTargetMachine *TM;
41 const AMDGPUSubtarget *ST;
42 const AMDGPUTargetLowering *TLI;
43
44 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
45 /// are set if the result needs to be inserted and/or extracted from vectors.
46 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
47
48public:
Craig Topper062a2ba2014-04-25 05:30:21 +000049 AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
Tom Stellard8b1e0212013-07-27 00:01:07 +000050 llvm_unreachable("This pass cannot be directly constructed");
51 }
52
53 AMDGPUTTI(const AMDGPUTargetMachine *TM)
54 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
Eric Christopherd9134482014-08-04 21:25:23 +000055 TLI(TM->getSubtargetImpl()->getTargetLowering()) {
Tom Stellard8b1e0212013-07-27 00:01:07 +000056 initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
57 }
58
Craig Topper5656db42014-04-29 07:57:24 +000059 void initializePass() override { pushTTIStack(this); }
Tom Stellard8b1e0212013-07-27 00:01:07 +000060
Craig Topper5656db42014-04-29 07:57:24 +000061 void getAnalysisUsage(AnalysisUsage &AU) const override {
Tom Stellard8b1e0212013-07-27 00:01:07 +000062 TargetTransformInfo::getAnalysisUsage(AU);
63 }
64
65 /// Pass identification.
66 static char ID;
67
68 /// Provide necessary pointer adjustments for the two base classes.
Craig Topper5656db42014-04-29 07:57:24 +000069 void *getAdjustedAnalysisPointer(const void *ID) override {
Tom Stellard8b1e0212013-07-27 00:01:07 +000070 if (ID == &TargetTransformInfo::ID)
71 return (TargetTransformInfo *)this;
72 return this;
73 }
74
Craig Topper5656db42014-04-29 07:57:24 +000075 bool hasBranchDivergence() const override;
Tom Stellard8b1e0212013-07-27 00:01:07 +000076
Eric Christopherd85ffb12014-09-18 00:34:14 +000077 void getUnrollingPreferences(const Function *F, Loop *L,
Craig Topper5656db42014-04-29 07:57:24 +000078 UnrollingPreferences &UP) const override;
Tom Stellard8cce9bd2014-01-23 18:49:28 +000079
Matt Arsenault3dd43fc2014-07-18 06:07:13 +000080 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
81
Matt Arsenaulta93441f2014-07-19 18:15:16 +000082 unsigned getNumberOfRegisters(bool Vector) const override;
83 unsigned getRegisterBitWidth(bool Vector) const override;
Sanjay Patelb653de12014-09-10 17:58:16 +000084 unsigned getMaxInterleaveFactor() const override;
Matt Arsenaulta93441f2014-07-19 18:15:16 +000085
Tom Stellard8b1e0212013-07-27 00:01:07 +000086 /// @}
87};
88
89} // end anonymous namespace
90
91INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
92 "AMDGPU Target Transform Info", true, true, false)
93char AMDGPUTTI::ID = 0;
94
95ImmutablePass *
96llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
97 return new AMDGPUTTI(TM);
98}
99
100bool AMDGPUTTI::hasBranchDivergence() const { return true; }
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000101
Eric Christopherd85ffb12014-09-18 00:34:14 +0000102void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000103 UnrollingPreferences &UP) const {
Matt Arsenaultc8244582014-07-25 23:02:42 +0000104 UP.Threshold = 300; // Twice the default.
105 UP.Count = UINT_MAX;
106 UP.Partial = true;
107
108 // TODO: Do we want runtime unrolling?
109
Matt Arsenaultac6e39c2014-07-17 06:19:06 +0000110 for (const BasicBlock *BB : L->getBlocks()) {
111 for (const Instruction &I : *BB) {
112 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
Matt Arsenault5e2b0f52014-07-17 06:13:41 +0000113 if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000114 continue;
Matt Arsenaultac6e39c2014-07-17 06:19:06 +0000115
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000116 const Value *Ptr = GEP->getPointerOperand();
117 const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
118 if (Alloca) {
119 // We want to do whatever we can to limit the number of alloca
120 // instructions that make it through to the code generator. allocas
121 // require us to use indirect addressing, which is slow and prone to
122 // compiler bugs. If this loop does an address calculation on an
Tom Stellardfd0d86c2014-02-25 21:36:21 +0000123 // alloca ptr, then we want to use a higher than normal loop unroll
Matt Arsenault5e1e4312014-04-04 20:13:08 +0000124 // threshold. This will give SROA a better chance to eliminate these
125 // allocas.
126 //
127 // Don't use the maximum allowed value here as it will make some
128 // programs way too big.
Matt Arsenaultc8244582014-07-25 23:02:42 +0000129 UP.Threshold = 800;
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000130 }
131 }
132 }
133}
Matt Arsenault3dd43fc2014-07-18 06:07:13 +0000134
135AMDGPUTTI::PopcntSupportKind
136AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
137 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
138 return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
139}
Matt Arsenaulta93441f2014-07-19 18:15:16 +0000140
141unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
142 if (Vec)
143 return 0;
144
145 // Number of VGPRs on SI.
146 if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
147 return 256;
148
149 return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
150}
151
152unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
153 return 32;
154}
155
Sanjay Patelb653de12014-09-10 17:58:16 +0000156unsigned AMDGPUTTI::getMaxInterleaveFactor() const {
Matt Arsenaulta93441f2014-07-19 18:15:16 +0000157 // Semi-arbitrary large amount.
158 return 64;
159}