blob: 1a1bdb35458a11ad36fd1b73c385772c38944f66 [file] [log] [blame]
Tom Stellard8b1e0212013-07-27 00:01:07 +00001//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// \file
11// This file implements a TargetTransformInfo analysis pass specific to the
12// AMDGPU target machine. It uses the target's detailed information to provide
13// more precise answers to certain TTI queries, while letting the target
14// independent and default TTI implementations handle the rest.
15//
16//===----------------------------------------------------------------------===//
17
Tom Stellard8b1e0212013-07-27 00:01:07 +000018#include "AMDGPU.h"
19#include "AMDGPUTargetMachine.h"
Tom Stellard8cce9bd2014-01-23 18:49:28 +000020#include "llvm/Analysis/LoopInfo.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000021#include "llvm/Analysis/TargetTransformInfo.h"
Tom Stellard8cce9bd2014-01-23 18:49:28 +000022#include "llvm/Analysis/ValueTracking.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000023#include "llvm/Support/Debug.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000024#include "llvm/Target/CostTable.h"
Chandler Carruth8a8cd2b2014-01-07 11:48:04 +000025#include "llvm/Target/TargetLowering.h"
Tom Stellard8b1e0212013-07-27 00:01:07 +000026using namespace llvm;
27
Chandler Carruth84e68b22014-04-22 02:41:26 +000028#define DEBUG_TYPE "AMDGPUtti"
29
Tom Stellard8b1e0212013-07-27 00:01:07 +000030// Declare the pass initialization routine locally as target-specific passes
31// don't have a target-wide initialization entry point, and so we rely on the
32// pass constructor initialization.
33namespace llvm {
34void initializeAMDGPUTTIPass(PassRegistry &);
35}
36
37namespace {
38
Craig Topper77dfe452014-03-02 08:08:51 +000039class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
Tom Stellard8b1e0212013-07-27 00:01:07 +000040 const AMDGPUTargetMachine *TM;
41 const AMDGPUSubtarget *ST;
42 const AMDGPUTargetLowering *TLI;
43
44 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
45 /// are set if the result needs to be inserted and/or extracted from vectors.
46 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
47
48public:
49 AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
50 llvm_unreachable("This pass cannot be directly constructed");
51 }
52
53 AMDGPUTTI(const AMDGPUTargetMachine *TM)
54 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
55 TLI(TM->getTargetLowering()) {
56 initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
57 }
58
Craig Topper73156022014-03-02 09:09:27 +000059 virtual void initializePass() override { pushTTIStack(this); }
Tom Stellard8b1e0212013-07-27 00:01:07 +000060
Craig Topper73156022014-03-02 09:09:27 +000061 virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
Tom Stellard8b1e0212013-07-27 00:01:07 +000062 TargetTransformInfo::getAnalysisUsage(AU);
63 }
64
65 /// Pass identification.
66 static char ID;
67
68 /// Provide necessary pointer adjustments for the two base classes.
Craig Topper73156022014-03-02 09:09:27 +000069 virtual void *getAdjustedAnalysisPointer(const void *ID) override {
Tom Stellard8b1e0212013-07-27 00:01:07 +000070 if (ID == &TargetTransformInfo::ID)
71 return (TargetTransformInfo *)this;
72 return this;
73 }
74
Craig Topper73156022014-03-02 09:09:27 +000075 virtual bool hasBranchDivergence() const override;
Tom Stellard8b1e0212013-07-27 00:01:07 +000076
Tom Stellard8cce9bd2014-01-23 18:49:28 +000077 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
78
Tom Stellard8b1e0212013-07-27 00:01:07 +000079 /// @}
80};
81
82} // end anonymous namespace
83
84INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
85 "AMDGPU Target Transform Info", true, true, false)
86char AMDGPUTTI::ID = 0;
87
88ImmutablePass *
89llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
90 return new AMDGPUTTI(TM);
91}
92
93bool AMDGPUTTI::hasBranchDivergence() const { return true; }
Tom Stellard8cce9bd2014-01-23 18:49:28 +000094
95void AMDGPUTTI::getUnrollingPreferences(Loop *L,
96 UnrollingPreferences &UP) const {
97 for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
98 BI != BE; ++BI) {
99 BasicBlock *BB = *BI;
100 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
101 I != E; ++I) {
102 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
103 if (!GEP)
104 continue;
105 const Value *Ptr = GEP->getPointerOperand();
106 const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
107 if (Alloca) {
108 // We want to do whatever we can to limit the number of alloca
109 // instructions that make it through to the code generator. allocas
110 // require us to use indirect addressing, which is slow and prone to
111 // compiler bugs. If this loop does an address calculation on an
Tom Stellardfd0d86c2014-02-25 21:36:21 +0000112 // alloca ptr, then we want to use a higher than normal loop unroll
Matt Arsenault5e1e4312014-04-04 20:13:08 +0000113 // threshold. This will give SROA a better chance to eliminate these
114 // allocas.
115 //
116 // Don't use the maximum allowed value here as it will make some
117 // programs way too big.
Tom Stellardfd0d86c2014-02-25 21:36:21 +0000118 UP.Threshold = 500;
Tom Stellard8cce9bd2014-01-23 18:49:28 +0000119 }
120 }
121 }
122}