blob: 69a229e32f4384b759c000dc277aa7ab56a5e896 [file] [log] [blame]
Eli Benderskybbef1722014-04-03 21:18:25 +00001//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// When a load/store accesses the generic address space, checks whether the
11// address is casted from a non-generic address space. If so, remove this
12// addrspacecast because accessing non-generic address spaces is typically
Jingyue Wu995dde22015-05-29 17:00:27 +000013// faster. Besides removing addrspacecasts directly used by loads/stores, this
14// optimization also recursively traces into a GEP's pointer operand and a
15// bitcast's source to find more eliminable addrspacecasts.
Eli Benderskybbef1722014-04-03 21:18:25 +000016//
17// For instance, the code below loads a float from an array allocated in
18// addrspace(3).
19//
Jingyue Wu995dde22015-05-29 17:00:27 +000020// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
21// %1 = gep [10 x float]* %0, i64 0, i64 %i
22// %2 = bitcast float* %1 to i32*
23// %3 = load i32* %2 ; emits ld.u32
Eli Benderskybbef1722014-04-03 21:18:25 +000024//
Jingyue Wu995dde22015-05-29 17:00:27 +000025// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
26// and the bitcast to expose more optimization opportunities to function
Eli Benderskybbef1722014-04-03 21:18:25 +000027// optimizeMemoryInst. The intermediate code looks like:
28//
Jingyue Wu995dde22015-05-29 17:00:27 +000029// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
30// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
31// %2 = addrspacecast i32 addrspace(3)* %1 to i32*
32// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
Eli Benderskybbef1722014-04-03 21:18:25 +000033//
34// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
35// generic pointers, and folds the load and the addrspacecast into a load from
36// the original address space. The final code looks like:
37//
Jingyue Wu995dde22015-05-29 17:00:27 +000038// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
39// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
40// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
Eli Benderskybbef1722014-04-03 21:18:25 +000041//
42// This pass may remove an addrspacecast in a different BB. Therefore, we
43// implement it as a FunctionPass.
44//
Jingyue Wu995dde22015-05-29 17:00:27 +000045// TODO:
46// The current implementation doesn't handle PHINodes. Eliminating
47// addrspacecasts used by PHINodes is trickier because PHINodes can introduce
48// loops in data flow. For example,
49//
50// %generic.input = addrspacecast float addrspace(3)* %input to float*
51// loop:
52// %y = phi [ %generic.input, %y2 ]
53// %y2 = getelementptr %y, 1
54// %v = load %y2
55// br ..., label %loop, ...
56//
57// Marking %y2 shared depends on marking %y shared, but %y also data-flow
58// depends on %y2. We probably need an iterative fix-point algorithm on handle
59// this case.
60//
Eli Benderskybbef1722014-04-03 21:18:25 +000061//===----------------------------------------------------------------------===//
62
63#include "NVPTX.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/Instructions.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/Support/CommandLine.h"
68
69using namespace llvm;
70
71// An option to disable this optimization. Enable it by default.
72static cl::opt<bool> DisableFavorNonGeneric(
73 "disable-nvptx-favor-non-generic",
74 cl::init(false),
75 cl::desc("Do not convert generic address space usage "
76 "to non-generic address space usage"),
77 cl::Hidden);
78
79namespace {
80/// \brief NVPTXFavorNonGenericAddrSpaces
81class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
82public:
83 static char ID;
84 NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
Jingyue Wu69a66852014-05-23 06:30:12 +000085 bool runOnFunction(Function &F) override;
Eli Benderskybbef1722014-04-03 21:18:25 +000086
Jingyue Wu995dde22015-05-29 17:00:27 +000087private:
Eli Benderskybbef1722014-04-03 21:18:25 +000088 /// Optimizes load/store instructions. Idx is the index of the pointer operand
89 /// (0 for load, and 1 for store). Returns true if it changes anything.
90 bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
Jingyue Wu995dde22015-05-29 17:00:27 +000091 /// Recursively traces into a GEP's pointer operand or a bitcast's source to
92 /// find an eliminable addrspacecast, and hoists that addrspacecast to the
93 /// outermost level. For example, this function transforms
94 /// bitcast(gep(gep(addrspacecast(X))))
95 /// to
96 /// addrspacecast(bitcast(gep(gep(X)))).
97 ///
98 /// This reordering exposes to optimizeMemoryInstruction more
99 /// optimization opportunities on loads and stores.
100 ///
Jingyue Wu75589ff2015-06-09 21:50:32 +0000101 /// If this function succesfully hoists an eliminable addrspacecast or V is
102 /// already such an addrspacecast, it returns the transformed value (which is
103 /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
104 Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
Jingyue Wu995dde22015-05-29 17:00:27 +0000105 /// Helper function for GEPs.
Jingyue Wu75589ff2015-06-09 21:50:32 +0000106 Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
Jingyue Wu995dde22015-05-29 17:00:27 +0000107 /// Helper function for bitcasts.
Jingyue Wu75589ff2015-06-09 21:50:32 +0000108 Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
Eli Benderskybbef1722014-04-03 21:18:25 +0000109};
Alexander Kornienkof00654e2015-06-23 09:49:53 +0000110}
Eli Benderskybbef1722014-04-03 21:18:25 +0000111
112char NVPTXFavorNonGenericAddrSpaces::ID = 0;
113
114namespace llvm {
115void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
116}
117INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
118 "Remove unnecessary non-generic-to-generic addrspacecasts",
119 false, false)
120
Jingyue Wu995dde22015-05-29 17:00:27 +0000121// Decides whether V is an addrspacecast and shortcutting V in load/store is
122// valid and beneficial.
123static bool isEliminableAddrSpaceCast(Value *V) {
124 // Returns false if V is not even an addrspacecast.
125 Operator *Cast = dyn_cast<Operator>(V);
126 if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
Eli Benderskybbef1722014-04-03 21:18:25 +0000127 return false;
128
129 Value *Src = Cast->getOperand(0);
130 PointerType *SrcTy = cast<PointerType>(Src->getType());
131 PointerType *DestTy = cast<PointerType>(Cast->getType());
132 // TODO: For now, we only handle the case where the addrspacecast only changes
133 // the address space but not the type. If the type also changes, we could
134 // still get rid of the addrspacecast by adding an extra bitcast, but we
135 // rarely see such scenarios.
136 if (SrcTy->getElementType() != DestTy->getElementType())
137 return false;
138
139 // Checks whether the addrspacecast is from a non-generic address space to the
140 // generic address space.
141 return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC &&
142 DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
143}
144
Jingyue Wu75589ff2015-06-09 21:50:32 +0000145Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
146 GEPOperator *GEP, int Depth) {
147 Value *NewOperand =
148 hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
149 if (NewOperand == nullptr)
150 return nullptr;
Eli Benderskybbef1722014-04-03 21:18:25 +0000151
Jingyue Wu75589ff2015-06-09 21:50:32 +0000152 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
153 assert(isEliminableAddrSpaceCast(NewOperand));
154 Operator *Cast = cast<Operator>(NewOperand);
Eli Benderskybbef1722014-04-03 21:18:25 +0000155
156 SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
Jingyue Wu75589ff2015-06-09 21:50:32 +0000157 Value *NewASC;
Eli Benderskybbef1722014-04-03 21:18:25 +0000158 if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
Jingyue Wu995dde22015-05-29 17:00:27 +0000159 // GEP = gep (addrspacecast X), indices
Eli Benderskybbef1722014-04-03 21:18:25 +0000160 // =>
Jingyue Wu995dde22015-05-29 17:00:27 +0000161 // NewGEP = gep X, indices
162 // NewASC = addrspacecast NewGEP
163 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
David Blaikie096b1da2015-03-14 19:53:33 +0000164 GEP->getSourceElementType(), Cast->getOperand(0), Indices,
Jingyue Wu995dde22015-05-29 17:00:27 +0000165 "", GEPI);
166 NewGEP->setIsInBounds(GEP->isInBounds());
Jingyue Wu75589ff2015-06-09 21:50:32 +0000167 NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
Jingyue Wu995dde22015-05-29 17:00:27 +0000168 NewASC->takeName(GEP);
Jingyue Wu75589ff2015-06-09 21:50:32 +0000169 // Without RAUWing GEP, the compiler would visit GEP again and emit
170 // redundant instructions. This is exercised in test @rauw in
171 // access-non-generic.ll.
Jingyue Wu995dde22015-05-29 17:00:27 +0000172 GEP->replaceAllUsesWith(NewASC);
Eli Benderskybbef1722014-04-03 21:18:25 +0000173 } else {
174 // GEP is a constant expression.
Jingyue Wu995dde22015-05-29 17:00:27 +0000175 Constant *NewGEP = ConstantExpr::getGetElementPtr(
David Blaikie4a2e73b2015-04-02 18:55:32 +0000176 GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
177 Indices, GEP->isInBounds());
Jingyue Wu75589ff2015-06-09 21:50:32 +0000178 NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
Eli Benderskybbef1722014-04-03 21:18:25 +0000179 }
Jingyue Wu75589ff2015-06-09 21:50:32 +0000180 return NewASC;
Eli Benderskybbef1722014-04-03 21:18:25 +0000181}
182
Jingyue Wu75589ff2015-06-09 21:50:32 +0000183Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
Jingyue Wu995dde22015-05-29 17:00:27 +0000184 BitCastOperator *BC, int Depth) {
Jingyue Wu75589ff2015-06-09 21:50:32 +0000185 Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
186 if (NewOperand == nullptr)
187 return nullptr;
Jingyue Wu995dde22015-05-29 17:00:27 +0000188
Jingyue Wu75589ff2015-06-09 21:50:32 +0000189 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
190 assert(isEliminableAddrSpaceCast(NewOperand));
191 Operator *Cast = cast<Operator>(NewOperand);
Jingyue Wu995dde22015-05-29 17:00:27 +0000192
193 // Cast = addrspacecast Src
194 // BC = bitcast Cast
195 // =>
196 // Cast' = bitcast Src
197 // BC' = addrspacecast Cast'
198 Value *Src = Cast->getOperand(0);
199 Type *TypeOfNewCast =
200 PointerType::get(BC->getType()->getPointerElementType(),
201 Src->getType()->getPointerAddressSpace());
Jingyue Wu75589ff2015-06-09 21:50:32 +0000202 Value *NewBC;
Jingyue Wu995dde22015-05-29 17:00:27 +0000203 if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
204 Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
Jingyue Wu75589ff2015-06-09 21:50:32 +0000205 NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
Jingyue Wu995dde22015-05-29 17:00:27 +0000206 NewBC->takeName(BC);
Jingyue Wu75589ff2015-06-09 21:50:32 +0000207 // Without RAUWing BC, the compiler would visit BC again and emit
208 // redundant instructions. This is exercised in test @rauw in
209 // access-non-generic.ll.
Jingyue Wu995dde22015-05-29 17:00:27 +0000210 BC->replaceAllUsesWith(NewBC);
211 } else {
212 // BC is a constant expression.
213 Constant *NewCast =
214 ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
Jingyue Wu75589ff2015-06-09 21:50:32 +0000215 NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
Jingyue Wu995dde22015-05-29 17:00:27 +0000216 }
Jingyue Wu75589ff2015-06-09 21:50:32 +0000217 return NewBC;
Jingyue Wu995dde22015-05-29 17:00:27 +0000218}
219
Jingyue Wu75589ff2015-06-09 21:50:32 +0000220Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
221 int Depth) {
222 // Returns V if V is already an eliminable addrspacecast.
Jingyue Wu995dde22015-05-29 17:00:27 +0000223 if (isEliminableAddrSpaceCast(V))
Jingyue Wu75589ff2015-06-09 21:50:32 +0000224 return V;
Jingyue Wu995dde22015-05-29 17:00:27 +0000225
226 // Limit the depth to prevent this recursive function from running too long.
227 const int MaxDepth = 20;
228 if (Depth >= MaxDepth)
Jingyue Wu75589ff2015-06-09 21:50:32 +0000229 return nullptr;
Jingyue Wu995dde22015-05-29 17:00:27 +0000230
231 // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
232 // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
233 // that are not directly used by the load/store.
234 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
235 return hoistAddrSpaceCastFromGEP(GEP, Depth);
236
237 if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
238 return hoistAddrSpaceCastFromBitCast(BC, Depth);
239
Jingyue Wu75589ff2015-06-09 21:50:32 +0000240 return nullptr;
Jingyue Wu995dde22015-05-29 17:00:27 +0000241}
242
Eli Benderskybbef1722014-04-03 21:18:25 +0000243bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
244 unsigned Idx) {
Jingyue Wu75589ff2015-06-09 21:50:32 +0000245 Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx));
246 if (NewOperand == nullptr)
247 return false;
248
249 // load/store (addrspacecast X) => load/store X if shortcutting the
250 // addrspacecast is valid and can improve performance.
251 //
252 // e.g.,
253 // %1 = addrspacecast float addrspace(3)* %0 to float*
254 // %2 = load float* %1
255 // ->
256 // %2 = load float addrspace(3)* %0
257 //
258 // Note: the addrspacecast can also be a constant expression.
259 assert(isEliminableAddrSpaceCast(NewOperand));
260 Operator *ASC = dyn_cast<Operator>(NewOperand);
261 MI->setOperand(Idx, ASC->getOperand(0));
262 return true;
Eli Benderskybbef1722014-04-03 21:18:25 +0000263}
264
265bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
266 if (DisableFavorNonGeneric)
267 return false;
268
269 bool Changed = false;
270 for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
271 for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
272 if (isa<LoadInst>(I)) {
273 // V = load P
274 Changed |= optimizeMemoryInstruction(I, 0);
275 } else if (isa<StoreInst>(I)) {
276 // store V, P
277 Changed |= optimizeMemoryInstruction(I, 1);
278 }
279 }
280 }
281 return Changed;
282}
283
284FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() {
285 return new NVPTXFavorNonGenericAddrSpaces();
286}