blob: 2d934a4cb099b38de81b88a75b302a37c33af89e [file] [log] [blame]
Tom Stellard8b1e0212013-07-27 00:01:07 +00001//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// \file
11// This file implements a TargetTransformInfo analysis pass specific to the
12// AMDGPU target machine. It uses the target's detailed information to provide
13// more precise answers to certain TTI queries, while letting the target
14// independent and default TTI implementations handle the rest.
15//
16//===----------------------------------------------------------------------===//
17
Tom Stellard8b1e0212013-07-27 00:01:07 +000018#include "AMDGPU.h"
19#include "AMDGPUTargetMachine.h"
Tom Stellard8cce9bd2014-01-23 18:49:28 +000020#include "llvm/Analysis/LoopInfo.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000021#include "llvm/Analysis/TargetTransformInfo.h"
Tom Stellard8cce9bd2014-01-23 18:49:28 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000023#include "llvm/Support/Debug.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000024#include "llvm/Target/CostTable.h"
Chandler Carruth8a8cd2b2014-01-07 11:48:04 +000025#include "llvm/Target/TargetLowering.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000026using namespace llvm;
27
Chandler Carruth84e68b22014-04-22 02:41:26 +000028#define DEBUG_TYPE "AMDGPUtti"
29
Tom Stellard8b1e0212013-07-27 00:01:07 +000030// Declare the pass initialization routine locally as target-specific passes
31// don't have a target-wide initialization entry point, and so we rely on the
32// pass constructor initialization.
33namespace llvm {
34void initializeAMDGPUTTIPass(PassRegistry &);
35}
36
37namespace {
38
Craig Topper77dfe452014-03-02 08:08:51 +000039class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
Tom Stellard8b1e0212013-07-27 00:01:07 +000040 const AMDGPUTargetMachine *TM;
41 const AMDGPUSubtarget *ST;
42 const AMDGPUTargetLowering *TLI;
43
44 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
45 /// are set if the result needs to be inserted and/or extracted from vectors.
46 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
47
48public:
Craig Topper062a2ba2014-04-25 05:30:21 +000049 AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
Tom Stellard8b1e0212013-07-27 00:01:07 +000050 llvm_unreachable("This pass cannot be directly constructed");
51 }
52
53 AMDGPUTTI(const AMDGPUTargetMachine *TM)
54 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
55 TLI(TM->getTargetLowering()) {
56 initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
57 }
58
Craig Topper5656db42014-04-29 07:57:24 +000059 void initializePass() override { pushTTIStack(this); }
Tom Stellard8b1e0212013-07-27 00:01:07 +000060
Craig Topper5656db42014-04-29 07:57:24 +000061 void getAnalysisUsage(AnalysisUsage &AU) const override {
Tom Stellard8b1e0212013-07-27 00:01:07 +000062 TargetTransformInfo::getAnalysisUsage(AU);
63 }
64
65 /// Pass identification.
66 static char ID;
67
68 /// Provide necessary pointer adjustments for the two base classes.
Craig Topper5656db42014-04-29 07:57:24 +000069 void *getAdjustedAnalysisPointer(const void *ID) override {
Tom Stellard8b1e0212013-07-27 00:01:07 +000070 if (ID == &TargetTransformInfo::ID)
71 return (TargetTransformInfo *)this;
72 return this;
73 }
74
Craig Topper5656db42014-04-29 07:57:24 +000075 bool hasBranchDivergence() const override;
Tom Stellard8b1e0212013-07-27 00:01:07 +000076
Craig Topper5656db42014-04-29 07:57:24 +000077 void getUnrollingPreferences(Loop *L,
78 UnrollingPreferences &UP) const override;
Tom Stellard8cce9bd2014-01-23 18:49:28 +000079
Tom Stellard8b1e0212013-07-27 00:01:07 +000080 /// @}
81};
82
83} // end anonymous namespace
84
85INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
86 "AMDGPU Target Transform Info", true, true, false)
87char AMDGPUTTI::ID = 0;
88
89ImmutablePass *
90llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
91 return new AMDGPUTTI(TM);
92}
93
94bool AMDGPUTTI::hasBranchDivergence() const { return true; }
Tom Stellard8cce9bd2014-01-23 18:49:28 +000095
96void AMDGPUTTI::getUnrollingPreferences(Loop *L,
97 UnrollingPreferences &UP) const {
Matt Arsenaultac6e39c2014-07-17 06:19:06 +000098 for (const BasicBlock *BB : L->getBlocks()) {
99 for (const Instruction &I : *BB) {
100 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
Matt Arsenault5e2b0f52014-07-17 06:13:41 +0000101 if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000102 continue;
Matt Arsenaultac6e39c2014-07-17 06:19:06 +0000103
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000104 const Value *Ptr = GEP->getPointerOperand();
105 const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
106 if (Alloca) {
107 // We want to do whatever we can to limit the number of alloca
108 // instructions that make it through to the code generator. allocas
109 // require us to use indirect addressing, which is slow and prone to
110 // compiler bugs. If this loop does an address calculation on an
Tom Stellardfd0d86c2014-02-25 21:36:21 +0000111 // alloca ptr, then we want to use a higher than normal loop unroll
Matt Arsenault5e1e4312014-04-04 20:13:08 +0000112 // threshold. This will give SROA a better chance to eliminate these
113 // allocas.
114 //
115 // Don't use the maximum allowed value here as it will make some
116 // programs way too big.
Tom Stellardfd0d86c2014-02-25 21:36:21 +0000117 UP.Threshold = 500;
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000118 }
119 }
120 }
121}