Blame - llvm/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp - toolchain/llvm-project

blob: e451d273cf44ebd363a31c8d3b976a661a39e026 [file] [log] [blame]

Jingyue Wu	1375560	2016-03-20 20:59:20 +0000	[diff] [blame]	1	//===-- NVPTXInferAddressSpace.cpp - ---------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// CUDA C/C++ includes memory space designation as variable type qualifers (such
				11	// as __global__ and __shared__). Knowing the space of a memory access allows
				12	// CUDA compilers to emit faster PTX loads and stores. For example, a load from
				13	// shared memory can be translated to `ld.shared` which is roughly 10% faster
				14	// than a generic `ld` on an NVIDIA Tesla K40c.
				15	//
				16	// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
				17	// compilers must infer the memory space of an address expression from
				18	// type-qualified variables.
				19	//
				20	// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
				21	// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
				22	// places only type-qualified variables in specific address spaces, and then
				23	// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
				24	// (so-called the generic address space) for other instructions to use.
				25	//
				26	// For example, the Clang translates the following CUDA code
				27	// __shared__ float a[10];
				28	// float v = a[i];
				29	// to
				30	// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
				31	// %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
				32	// %v = load float, float* %1 ; emits ld.f32
				33	// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
				34	// redirected to %0 (the generic version of @a).
				35	//
				36	// The optimization implemented in this file propagates specific address spaces
				37	// from type-qualified variable declarations to its users. For example, it
				38	// optimizes the above IR to
				39	// %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
				40	// %v = load float addrspace(3)* %1 ; emits ld.shared.f32
				41	// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
				42	// codegen is able to emit ld.shared.f32 for %v.
				43	//
				44	// Address space inference works in two steps. First, it uses a data-flow
				45	// analysis to infer as many generic pointers as possible to point to only one
				46	// specific address space. In the above example, it can prove that %1 only
				47	// points to addrspace(3). This algorithm was published in
				48	// CUDA: Compiling and optimizing for a GPU platform
				49	// Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
				50	// ICCS 2012
				51	//
				52	// Then, address space inference replaces all refinable generic pointers with
				53	// equivalent specific pointers.
				54	//
				55	// The major challenge of implementing this optimization is handling PHINodes,
				56	// which may create loops in the data flow graph. This brings two complications.
				57	//
				58	// First, the data flow analysis in Step 1 needs to be circular. For example,
				59	// %generic.input = addrspacecast float addrspace(3)* %input to float*
				60	// loop:
				61	// %y = phi [ %generic.input, %y2 ]
				62	// %y2 = getelementptr %y, 1
				63	// %v = load %y2
				64	// br ..., label %loop, ...
				65	// proving %y specific requires proving both %generic.input and %y2 specific,
				66	// but proving %y2 specific circles back to %y. To address this complication,
				67	// the data flow analysis operates on a lattice:
				68	// uninitialized > specific address spaces > generic.
				69	// All address expressions (our implementation only considers phi, bitcast,
				70	// addrspacecast, and getelementptr) start with the uninitialized address space.
				71	// The monotone transfer function moves the address space of a pointer down a
				72	// lattice path from uninitialized to specific and then to generic. A join
				73	// operation of two different specific address spaces pushes the expression down
				74	// to the generic address space. The analysis completes once it reaches a fixed
				75	// point.
				76	//
				77	// Second, IR rewriting in Step 2 also needs to be circular. For example,
				78	// converting %y to addrspace(3) requires the compiler to know the converted
				79	// %y2, but converting %y2 needs the converted %y. To address this complication,
				80	// we break these cycles using "undef" placeholders. When converting an
				81	// instruction `I` to a new address space, if its operand `Op` is not converted
				82	// yet, we let `I` temporarily use `undef` and fix all the uses of undef later.
				83	// For instance, our algorithm first converts %y to
				84	// %y' = phi float addrspace(3)* [ %input, undef ]
				85	// Then, it converts %y2 to
				86	// %y2' = getelementptr %y', 1
				87	// Finally, it fixes the undef in %y' so that
				88	// %y' = phi float addrspace(3)* [ %input, %y2' ]
				89	//
				90	// TODO: This pass is experimental and not enabled by default. Users can turn it
				91	// on by setting the -nvptx-use-infer-addrspace flag of llc. We plan to replace
				92	// NVPTXNonFavorGenericAddrSpaces with this pass shortly.
				93	//===----------------------------------------------------------------------===//
				94
				95	#define DEBUG_TYPE "nvptx-infer-addrspace"
				96
				97	#include "NVPTX.h"
				98	#include "MCTargetDesc/NVPTXBaseInfo.h"
				99	#include "llvm/ADT/DenseSet.h"
				100	#include "llvm/ADT/Optional.h"
				101	#include "llvm/ADT/SetVector.h"
				102	#include "llvm/IR/Function.h"
				103	#include "llvm/IR/InstIterator.h"
				104	#include "llvm/IR/Instructions.h"
				105	#include "llvm/IR/Operator.h"
Jingyue Wu	1375560	2016-03-20 20:59:20 +0000	[diff] [blame]	106	#include "llvm/Support/Debug.h"
				107	#include "llvm/Support/raw_ostream.h"
				108	#include "llvm/Transforms/Utils/Local.h"
				109	#include "llvm/Transforms/Utils/ValueMapper.h"
				110
				111	using namespace llvm;
				112
				113	namespace {
				114	const unsigned ADDRESS_SPACE_UNINITIALIZED = (unsigned)-1;
				115
				116	using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
				117
				118	/// \brief NVPTXInferAddressSpaces
				119	class NVPTXInferAddressSpaces: public FunctionPass {
				120	public:
				121	static char ID;
				122
				123	NVPTXInferAddressSpaces() : FunctionPass(ID) {}
				124
				125	bool runOnFunction(Function &F) override;
				126
				127	private:
				128	// Returns the new address space of V if updated; otherwise, returns None.
				129	Optional<unsigned>
				130	updateAddressSpace(const Value &V,
				131	const ValueToAddrSpaceMapTy &InferredAddrSpace);
				132
				133	// Tries to infer the specific address space of each address expression in
				134	// Postorder.
				135	void inferAddressSpaces(const std::vector<Value *> &Postorder,
				136	ValueToAddrSpaceMapTy *InferredAddrSpace);
				137
				138	// Changes the generic address expressions in function F to point to specific
				139	// address spaces if InferredAddrSpace says so. Postorder is the postorder of
				140	// all generic address expressions in the use-def graph of function F.
				141	bool
				142	rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
				143	const ValueToAddrSpaceMapTy &InferredAddrSpace,
				144	Function *F);
				145	};
				146	} // end anonymous namespace
				147
				148	char NVPTXInferAddressSpaces::ID = 0;
				149
				150	namespace llvm {
				151	void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
				152	}
				153	INITIALIZE_PASS(NVPTXInferAddressSpaces, "nvptx-infer-addrspace",
				154	"Infer address spaces",
				155	false, false)
				156
				157	// Returns true if V is an address expression.
				158	// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
				159	// getelementptr operators.
				160	static bool isAddressExpression(const Value &V) {
				161	if (!isa<Operator>(V))
				162	return false;
				163
				164	switch (cast<Operator>(V).getOpcode()) {
				165	case Instruction::PHI:
				166	case Instruction::BitCast:
				167	case Instruction::AddrSpaceCast:
				168	case Instruction::GetElementPtr:
				169	return true;
				170	default:
				171	return false;
				172	}
				173	}
				174
				175	// Returns the pointer operands of V.
				176	//
				177	// Precondition: V is an address expression.
				178	static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
				179	assert(isAddressExpression(V));
				180	const Operator& Op = cast<Operator>(V);
				181	switch (Op.getOpcode()) {
				182	case Instruction::PHI: {
				183	auto IncomingValues = cast<PHINode>(Op).incoming_values();
				184	return SmallVector<Value *, 2>(IncomingValues.begin(),
				185	IncomingValues.end());
				186	}
				187	case Instruction::BitCast:
				188	case Instruction::AddrSpaceCast:
				189	case Instruction::GetElementPtr:
				190	return {Op.getOperand(0)};
				191	default:
				192	llvm_unreachable("Unexpected instruction type.");
				193	}
				194	}
				195
				196	// If V is an unvisited generic address expression, appends V to PostorderStack
				197	// and marks it as visited.
				198	static void appendsGenericAddressExpressionToPostorderStack(
				199	Value V, std::vector<std::pair<Value , bool>> *PostorderStack,
				200	DenseSet<Value > Visited) {
				201	assert(V->getType()->isPointerTy());
				202	if (isAddressExpression(*V) &&
				203	V->getType()->getPointerAddressSpace() ==
				204	AddressSpace::ADDRESS_SPACE_GENERIC) {
				205	if (Visited->insert(V).second)
				206	PostorderStack->push_back(std::make_pair(V, false));
				207	}
				208	}
				209
				210	// Returns all generic address expressions in function F. The elements are
				211	// ordered in postorder.
				212	static std::vector<Value *> collectGenericAddressExpressions(Function &F) {
				213	// This function implements a non-recursive postorder traversal of a partial
				214	// use-def graph of function F.
				215	std::vector<std::pair<Value*, bool>> PostorderStack;
				216	// The set of visited expressions.
				217	DenseSet<Value*> Visited;
				218	// We only explore address expressions that are reachable from loads and
				219	// stores for now because we aim at generating faster loads and stores.
				220	for (Instruction &I : instructions(F)) {
				221	if (isa<LoadInst>(I)) {
				222	appendsGenericAddressExpressionToPostorderStack(
				223	I.getOperand(0), &PostorderStack, &Visited);
				224	} else if (isa<StoreInst>(I)) {
				225	appendsGenericAddressExpressionToPostorderStack(
				226	I.getOperand(1), &PostorderStack, &Visited);
				227	}
				228	}
				229
				230	std::vector<Value *> Postorder; // The resultant postorder.
				231	while (!PostorderStack.empty()) {
				232	// If the operands of the expression on the top are already explored,
				233	// adds that expression to the resultant postorder.
				234	if (PostorderStack.back().second) {
				235	Postorder.push_back(PostorderStack.back().first);
				236	PostorderStack.pop_back();
				237	continue;
				238	}
				239	// Otherwise, adds its operands to the stack and explores them.
				240	PostorderStack.back().second = true;
				241	for (Value PtrOperand : getPointerOperands(PostorderStack.back().first)) {
				242	appendsGenericAddressExpressionToPostorderStack(
				243	PtrOperand, &PostorderStack, &Visited);
				244	}
				245	}
				246	return Postorder;
				247	}
				248
				249	// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
				250	// of OperandUse.get() in the new address space. If the clone is not ready yet,
				251	// returns an undef in the new address space as a placeholder.
				252	static Value *operandWithNewAddressSpaceOrCreateUndef(
				253	const Use &OperandUse, unsigned NewAddrSpace,
				254	const ValueToValueMapTy &ValueWithNewAddrSpace,
				255	SmallVectorImpl<const Use > UndefUsesToFix) {
				256	Value *Operand = OperandUse.get();
				257	if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
				258	return NewOperand;
				259
				260	UndefUsesToFix->push_back(&OperandUse);
				261	return UndefValue::get(
				262	Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace));
				263	}
				264
				265	// Returns a clone of `I` with its operands converted to those specified in
				266	// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
				267	// operand whose address space needs to be modified might not exist in
				268	// ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and
				269	// adds that operand use to UndefUsesToFix so that caller can fix them later.
				270	//
				271	// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
				272	// from a pointer whose type already matches. Therefore, this function returns a
				273	// Value* instead of an Instruction*.
				274	static Value *cloneInstructionWithNewAddressSpace(
				275	Instruction *I, unsigned NewAddrSpace,
				276	const ValueToValueMapTy &ValueWithNewAddrSpace,
				277	SmallVectorImpl<const Use > UndefUsesToFix) {
				278	Type *NewPtrType =
				279	I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
				280
				281	if (I->getOpcode() == Instruction::AddrSpaceCast) {
				282	Value *Src = I->getOperand(0);
				283	// Because `I` is generic, the source address space must be specific.
				284	// Therefore, the inferred address space must be the source space, according
				285	// to our algorithm.
				286	assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
				287	if (Src->getType() != NewPtrType)
				288	return new BitCastInst(Src, NewPtrType);
				289	return Src;
				290	}
				291
				292	// Computes the converted pointer operands.
				293	SmallVector<Value *, 4> NewPointerOperands;
				294	for (const Use &OperandUse : I->operands()) {
				295	if (!OperandUse.get()->getType()->isPointerTy())
				296	NewPointerOperands.push_back(nullptr);
				297	else
				298	NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
				299	OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
				300	}
				301
				302	switch (I->getOpcode()) {
				303	case Instruction::BitCast:
				304	return new BitCastInst(NewPointerOperands[0], NewPtrType);
				305	case Instruction::PHI: {
				306	assert(I->getType()->isPointerTy());
				307	PHINode *PHI = cast<PHINode>(I);
				308	PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
				309	for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
				310	unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index);
				311	NewPHI->addIncoming(NewPointerOperands[OperandNo],
				312	PHI->getIncomingBlock(Index));
				313	}
				314	return NewPHI;
				315	}
				316	case Instruction::GetElementPtr: {
				317	GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
				318	GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
				319	GEP->getSourceElementType(), NewPointerOperands[0],
				320	SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end()));
				321	NewGEP->setIsInBounds(GEP->isInBounds());
				322	return NewGEP;
				323	}
				324	default:
				325	llvm_unreachable("Unexpected opcode");
				326	}
				327	}
				328
				329	// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
				330	// constant expression `CE` with its operands replaced as specified in
				331	// ValueWithNewAddrSpace.
				332	static Value *cloneConstantExprWithNewAddressSpace(
				333	ConstantExpr *CE, unsigned NewAddrSpace,
				334	const ValueToValueMapTy &ValueWithNewAddrSpace) {
				335	Type *TargetType =
				336	CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
				337
				338	if (CE->getOpcode() == Instruction::AddrSpaceCast) {
				339	// Because CE is generic, the source address space must be specific.
				340	// Therefore, the inferred address space must be the source space according
				341	// to our algorithm.
				342	assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
				343	NewAddrSpace);
				344	return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);
				345	}
				346
				347	// Computes the operands of the new constant expression.
				348	SmallVector<Constant *, 4> NewOperands;
				349	for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
				350	Constant *Operand = CE->getOperand(Index);
				351	// If the address space of `Operand` needs to be modified, the new operand
				352	// with the new address space should already be in ValueWithNewAddrSpace
				353	// because (1) the constant expressions we consider (i.e. addrspacecast,
				354	// bitcast, and getelementptr) do not incur cycles in the data flow graph
				355	// and (2) this function is called on constant expressions in postorder.
				356	if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
				357	NewOperands.push_back(cast<Constant>(NewOperand));
				358	} else {
				359	// Otherwise, reuses the old operand.
				360	NewOperands.push_back(Operand);
				361	}
				362	}
				363
				364	if (CE->getOpcode() == Instruction::GetElementPtr) {
				365	// Needs to specify the source type while constructing a getelementptr
				366	// constant expression.
				367	return CE->getWithOperands(
				368	NewOperands, TargetType, /OnlyIfReduced=/false,
				369	NewOperands[0]->getType()->getPointerElementType());
				370	}
				371
				372	return CE->getWithOperands(NewOperands, TargetType);
				373	}
				374
				375	// Returns a clone of the value `V`, with its operands replaced as specified in
				376	// ValueWithNewAddrSpace. This function is called on every generic address
				377	// expression whose address space needs to be modified, in postorder.
				378	//
				379	// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
				380	static Value *
				381	cloneValueWithNewAddressSpace(Value *V, unsigned NewAddrSpace,
				382	const ValueToValueMapTy &ValueWithNewAddrSpace,
				383	SmallVectorImpl<const Use > UndefUsesToFix) {
				384	// All values in Postorder are generic address expressions.
				385	assert(isAddressExpression(*V) &&
				386	V->getType()->getPointerAddressSpace() ==
				387	AddressSpace::ADDRESS_SPACE_GENERIC);
				388
				389	if (Instruction *I = dyn_cast<Instruction>(V)) {
				390	Value *NewV = cloneInstructionWithNewAddressSpace(
				391	I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
				392	if (Instruction *NewI = dyn_cast<Instruction>(NewV)) {
				393	if (NewI->getParent() == nullptr) {
				394	NewI->insertBefore(I);
				395	NewI->takeName(I);
				396	}
				397	}
				398	return NewV;
				399	}
				400
				401	return cloneConstantExprWithNewAddressSpace(
				402	cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
				403	}
				404
				405	// Defines the join operation on the address space lattice (see the file header
				406	// comments).
				407	static unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) {
				408	if (AS1 == AddressSpace::ADDRESS_SPACE_GENERIC \|\|
				409	AS2 == AddressSpace::ADDRESS_SPACE_GENERIC)
				410	return AddressSpace::ADDRESS_SPACE_GENERIC;
				411
				412	if (AS1 == ADDRESS_SPACE_UNINITIALIZED)
				413	return AS2;
				414	if (AS2 == ADDRESS_SPACE_UNINITIALIZED)
				415	return AS1;
				416
				417	// The join of two different specific address spaces is generic.
				418	return AS1 == AS2 ? AS1 : (unsigned)AddressSpace::ADDRESS_SPACE_GENERIC;
				419	}
				420
				421	bool NVPTXInferAddressSpaces::runOnFunction(Function &F) {
Andrew Kaylor	87b10dd	2016-04-26 23:44:31 +0000	[diff] [blame]	422	if (skipFunction(F))
				423	return false;
				424
Jingyue Wu	1375560	2016-03-20 20:59:20 +0000	[diff] [blame]	425	// Collects all generic address expressions in postorder.
				426	std::vector<Value *> Postorder = collectGenericAddressExpressions(F);
				427
				428	// Runs a data-flow analysis to refine the address spaces of every expression
				429	// in Postorder.
				430	ValueToAddrSpaceMapTy InferredAddrSpace;
				431	inferAddressSpaces(Postorder, &InferredAddrSpace);
				432
				433	// Changes the address spaces of the generic address expressions who are
				434	// inferred to point to a specific address space.
				435	return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
				436	}
				437
				438	void NVPTXInferAddressSpaces::inferAddressSpaces(
				439	const std::vector<Value *> &Postorder,
				440	ValueToAddrSpaceMapTy *InferredAddrSpace) {
				441	SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
				442	// Initially, all expressions are in the uninitialized address space.
				443	for (Value *V : Postorder)
				444	(*InferredAddrSpace)[V] = ADDRESS_SPACE_UNINITIALIZED;
				445
				446	while (!Worklist.empty()) {
				447	Value* V = Worklist.pop_back_val();
				448
				449	// Tries to update the address space of the stack top according to the
				450	// address spaces of its operands.
				451	DEBUG(dbgs() << "Updating the address space of\n"
				452	<< " " << *V << "\n");
				453	Optional<unsigned> NewAS = updateAddressSpace(V, InferredAddrSpace);
				454	if (!NewAS.hasValue())
				455	continue;
				456	// If any updates are made, grabs its users to the worklist because
				457	// their address spaces can also be possibly updated.
				458	DEBUG(dbgs() << " to " << NewAS.getValue() << "\n");
				459	(*InferredAddrSpace)[V] = NewAS.getValue();
				460
				461	for (Value *User : V->users()) {
				462	// Skip if User is already in the worklist.
				463	if (Worklist.count(User))
				464	continue;
				465
				466	auto Pos = InferredAddrSpace->find(User);
				467	// Our algorithm only updates the address spaces of generic address
				468	// expressions, which are those in InferredAddrSpace.
				469	if (Pos == InferredAddrSpace->end())
				470	continue;
				471
				472	// Function updateAddressSpace moves the address space down a lattice
				473	// path. Therefore, nothing to do if User is already inferred as
				474	// generic (the bottom element in the lattice).
				475	if (Pos->second == AddressSpace::ADDRESS_SPACE_GENERIC)
				476	continue;
				477
				478	Worklist.insert(User);
				479	}
				480	}
				481	}
				482
				483	Optional<unsigned> NVPTXInferAddressSpaces::updateAddressSpace(
				484	const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) {
				485	assert(InferredAddrSpace.count(&V));
				486
				487	// The new inferred address space equals the join of the address spaces
				488	// of all its pointer operands.
				489	unsigned NewAS = ADDRESS_SPACE_UNINITIALIZED;
				490	for (Value *PtrOperand : getPointerOperands(V)) {
				491	unsigned OperandAS;
				492	if (InferredAddrSpace.count(PtrOperand))
				493	OperandAS = InferredAddrSpace.lookup(PtrOperand);
				494	else
				495	OperandAS = PtrOperand->getType()->getPointerAddressSpace();
				496	NewAS = joinAddressSpaces(NewAS, OperandAS);
				497	// join(generic, *) = generic. So we can break if NewAS is already generic.
				498	if (NewAS == AddressSpace::ADDRESS_SPACE_GENERIC)
				499	break;
				500	}
				501
				502	unsigned OldAS = InferredAddrSpace.lookup(&V);
				503	assert(OldAS != AddressSpace::ADDRESS_SPACE_GENERIC);
				504	if (OldAS == NewAS)
				505	return None;
				506	return NewAS;
				507	}
				508
				509	bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
				510	const std::vector<Value *> &Postorder,
				511	const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) {
				512	// For each address expression to be modified, creates a clone of it with its
				513	// pointer operands converted to the new address space. Since the pointer
				514	// operands are converted, the clone is naturally in the new address space by
				515	// construction.
				516	ValueToValueMapTy ValueWithNewAddrSpace;
				517	SmallVector<const Use *, 32> UndefUsesToFix;
				518	for (Value* V : Postorder) {
				519	unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
				520	if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
				521	ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace(
				522	V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
				523	}
				524	}
				525
				526	if (ValueWithNewAddrSpace.empty())
				527	return false;
				528
				529	// Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
				530	for (const Use* UndefUse : UndefUsesToFix) {
				531	User *V = UndefUse->getUser();
				532	User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V));
				533	unsigned OperandNo = UndefUse->getOperandNo();
				534	assert(isa<UndefValue>(NewV->getOperand(OperandNo)));
				535	NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
				536	}
				537
				538	// Replaces the uses of the old address expressions with the new ones.
				539	for (Value *V : Postorder) {
				540	Value *NewV = ValueWithNewAddrSpace.lookup(V);
				541	if (NewV == nullptr)
				542	continue;
				543
				544	SmallVector<Use *, 4> Uses;
				545	for (Use &U : V->uses())
				546	Uses.push_back(&U);
				547	DEBUG(dbgs() << "Replacing the uses of " << V << "\n to\n " << NewV
				548	<< "\n");
				549	for (Use *U : Uses) {
				550	if (isa<LoadInst>(U->getUser()) \|\|
				551	(isa<StoreInst>(U->getUser()) && U->getOperandNo() == 1)) {
				552	// If V is used as the pointer operand of a load/store, sets the pointer
				553	// operand to NewV. This replacement does not change the element type,
				554	// so the resultant load/store is still valid.
				555	U->set(NewV);
				556	} else if (isa<Instruction>(U->getUser())) {
				557	// Otherwise, replaces the use with generic(NewV).
				558	// TODO: Some optimization opportunities are missed. For example, in
				559	// %0 = icmp eq float* %p, %q
				560	// if both p and q are inferred to be shared, we can rewrite %0 as
				561	// %0 = icmp eq float addrspace(3)* %new_p, %new_q
				562	// instead of currently
				563	// %generic_p = addrspacecast float addrspace(3)* %new_p to float*
				564	// %generic_q = addrspacecast float addrspace(3)* %new_q to float*
				565	// %0 = icmp eq float* %generic_p, %generic_q
				566	if (Instruction *I = dyn_cast<Instruction>(V)) {
				567	BasicBlock::iterator InsertPos = std::next(I->getIterator());
				568	while (isa<PHINode>(InsertPos))
				569	++InsertPos;
				570	U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
				571	} else {
				572	U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
				573	V->getType()));
				574	}
				575	}
				576	}
				577	if (V->use_empty())
				578	RecursivelyDeleteTriviallyDeadInstructions(V);
				579	}
				580
				581	return true;
				582	}
				583
				584	FunctionPass *llvm::createNVPTXInferAddressSpacesPass() {
				585	return new NVPTXInferAddressSpaces();
				586	}