llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp - toolchain/llvm-project - Gitiles

 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// This pass adds amdgpu.uniform metadata to IR values so this information
 /// can be used during instruction selection.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "AMDGPUIntrinsicInfo.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"

 #define DEBUG_TYPE "amdgpu-annotate-uniform"

 using namespace llvm;

 namespace {

 class AMDGPUAnnotateUniformValues : public FunctionPass,
                        public InstVisitor<AMDGPUAnnotateUniformValues> {
   DivergenceAnalysis *DA;
   MemoryDependenceResults *MDR;
   LoopInfo *LI;
   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
   bool isKernelFunc;
   AMDGPUAS AMDGPUASI;

 public:
   static char ID;
   AMDGPUAnnotateUniformValues() :
     FunctionPass(ID) { }
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
   StringRef getPassName() const override {
     return "AMDGPU Annotate Uniform Values";
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DivergenceAnalysis>();
     AU.addRequired<MemoryDependenceWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.setPreservesAll();
  }

   void visitBranchInst(BranchInst &I);
   void visitLoadInst(LoadInst &I);
   bool isClobberedInFunction(LoadInst * Load);
 };

 } // End anonymous namespace

 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                       "Add AMDGPU uniform metadata", false, false)
 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                     "Add AMDGPU uniform metadata", false, false)

 char AMDGPUAnnotateUniformValues::ID = 0;

 static void setUniformMetadata(Instruction *I) {
   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
 }
 static void setNoClobberMetadata(Instruction *I) {
   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
 }

 static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
   for (auto I : predecessors(Root))
     if (Set.insert(I))
       DFS(I, Set);
 }

 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
   // 1. get Loop for the Load->getparent();
   // 2. if it exists, collect all the BBs from the most outer
   // loop and check for the writes. If NOT - start DFS over all preds.
   // 3. Start DFS over all preds from the most outer loop header.
   SetVector<BasicBlock *> Checklist;
   BasicBlock *Start = Load->getParent();
   Checklist.insert(Start);
   const Value *Ptr = Load->getPointerOperand();
   const Loop *L = LI->getLoopFor(Start);
   if (L) {
     const Loop *P = L;
     do {
       L = P;
       P = P->getParentLoop();
     } while (P);
     Checklist.insert(L->block_begin(), L->block_end());
     Start = L->getHeader();
   }

   DFS(Start, Checklist);
   for (auto &BB : Checklist) {
     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
       BasicBlock::iterator(Load) : BB->end();
     auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
                                            StartIt, BB, Load);
     if (Q.isClobber() || Q.isUnknown())
       return true;
   }
   return false;
 }

 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
   if (I.isUnconditional())
     return;

   Value *Cond = I.getCondition();
   if (!DA->isUniform(Cond))
     return;

   setUniformMetadata(I.getParent()->getTerminator());
 }

 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
   Value *Ptr = I.getPointerOperand();
   if (!DA->isUniform(Ptr))
     return;
   auto isGlobalLoad = [&](LoadInst &Load)->bool {
     return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
   };
   // We're tracking up to the Function boundaries
   // We cannot go beyond because of FunctionPass restrictions
   // Thus we can ensure that memory not clobbered for memory
   // operations that live in kernel only.
   bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I);
   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
   if (!PtrI && NotClobbered && isGlobalLoad(I)) {
     if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
       // Lookup for the existing GEP
       if (noClobberClones.count(Ptr)) {
         PtrI = noClobberClones[Ptr];
       } else {
         // Create GEP of the Value
         Function *F = I.getParent()->getParent();
         Value *Idx = Constant::getIntegerValue(
           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
         // Insert GEP at the entry to make it dominate all uses
         PtrI = GetElementPtrInst::Create(
           Ptr->getType()->getPointerElementType(), Ptr,
           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
       }
       I.replaceUsesOfWith(Ptr, PtrI);
     }
   }

   if (PtrI) {
     setUniformMetadata(PtrI);
     if (NotClobbered)
       setNoClobberMetadata(PtrI);
   }
 }

 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
   AMDGPUASI = AMDGPU::getAMDGPUAS(M);
   return false;
 }

 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;

   DA  = &getAnalysis<DivergenceAnalysis>();
   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;

   visit(F);
   noClobberClones.clear();
   return true;
 }

 FunctionPass *
 llvm::createAMDGPUAnnotateUniformValues() {
   return new AMDGPUAnnotateUniformValues();
 }
	//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// This pass adds amdgpu.uniform metadata to IR values so this information
	/// can be used during instruction selection.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "AMDGPUIntrinsicInfo.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/Analysis/DivergenceAnalysis.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/MemoryDependenceAnalysis.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstVisitor.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"

	#define DEBUG_TYPE "amdgpu-annotate-uniform"

	using namespace llvm;

	namespace {

	class AMDGPUAnnotateUniformValues : public FunctionPass,
	public InstVisitor<AMDGPUAnnotateUniformValues> {
	DivergenceAnalysis *DA;
	MemoryDependenceResults *MDR;
	LoopInfo *LI;
	DenseMap<Value, GetElementPtrInst> noClobberClones;
	bool isKernelFunc;
	AMDGPUAS AMDGPUASI;

	public:
	static char ID;
	AMDGPUAnnotateUniformValues() :
	FunctionPass(ID) { }
	bool doInitialization(Module &M) override;
	bool runOnFunction(Function &F) override;
	StringRef getPassName() const override {
	return "AMDGPU Annotate Uniform Values";
	}
	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<DivergenceAnalysis>();
	AU.addRequired<MemoryDependenceWrapperPass>();
	AU.addRequired<LoopInfoWrapperPass>();
	AU.setPreservesAll();
	}

	void visitBranchInst(BranchInst &I);
	void visitLoadInst(LoadInst &I);
	bool isClobberedInFunction(LoadInst * Load);
	};

	} // End anonymous namespace

	INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
	"Add AMDGPU uniform metadata", false, false)
	INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
	INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
	INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
	"Add AMDGPU uniform metadata", false, false)

	char AMDGPUAnnotateUniformValues::ID = 0;

	static void setUniformMetadata(Instruction *I) {
	I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
	}
	static void setNoClobberMetadata(Instruction *I) {
	I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
	}

	static void DFS(BasicBlock Root, SetVector<BasicBlock> & Set) {
	for (auto I : predecessors(Root))
	if (Set.insert(I))
	DFS(I, Set);
	}

	bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
	// 1. get Loop for the Load->getparent();
	// 2. if it exists, collect all the BBs from the most outer
	// loop and check for the writes. If NOT - start DFS over all preds.
	// 3. Start DFS over all preds from the most outer loop header.
	SetVector<BasicBlock *> Checklist;
	BasicBlock *Start = Load->getParent();
	Checklist.insert(Start);
	const Value *Ptr = Load->getPointerOperand();
	const Loop *L = LI->getLoopFor(Start);
	if (L) {
	const Loop *P = L;
	do {
	L = P;
	P = P->getParentLoop();
	} while (P);
	Checklist.insert(L->block_begin(), L->block_end());
	Start = L->getHeader();
	}

	DFS(Start, Checklist);
	for (auto &BB : Checklist) {
	BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
	BasicBlock::iterator(Load) : BB->end();
	auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
	StartIt, BB, Load);
	if (Q.isClobber() \|\| Q.isUnknown())
	return true;
	}
	return false;
	}

	void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
	if (I.isUnconditional())
	return;

	Value *Cond = I.getCondition();
	if (!DA->isUniform(Cond))
	return;

	setUniformMetadata(I.getParent()->getTerminator());
	}

	void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
	Value *Ptr = I.getPointerOperand();
	if (!DA->isUniform(Ptr))
	return;
	auto isGlobalLoad = [&](LoadInst &Load)->bool {
	return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
	};
	// We're tracking up to the Function boundaries
	// We cannot go beyond because of FunctionPass restrictions
	// Thus we can ensure that memory not clobbered for memory
	// operations that live in kernel only.
	bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I);
	Instruction *PtrI = dyn_cast<Instruction>(Ptr);
	if (!PtrI && NotClobbered && isGlobalLoad(I)) {
	if (isa<Argument>(Ptr) \|\| isa<GlobalValue>(Ptr)) {
	// Lookup for the existing GEP
	if (noClobberClones.count(Ptr)) {
	PtrI = noClobberClones[Ptr];
	} else {
	// Create GEP of the Value
	Function *F = I.getParent()->getParent();
	Value *Idx = Constant::getIntegerValue(
	Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
	// Insert GEP at the entry to make it dominate all uses
	PtrI = GetElementPtrInst::Create(
	Ptr->getType()->getPointerElementType(), Ptr,
	ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
	}
	I.replaceUsesOfWith(Ptr, PtrI);
	}
	}

	if (PtrI) {
	setUniformMetadata(PtrI);
	if (NotClobbered)
	setNoClobberMetadata(PtrI);
	}
	}

	bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
	AMDGPUASI = AMDGPU::getAMDGPUAS(M);
	return false;
	}

	bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
	if (skipFunction(F))
	return false;

	DA = &getAnalysis<DivergenceAnalysis>();
	MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
	LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
	isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;

	visit(F);
	noClobberClones.clear();
	return true;
	}

	FunctionPass *
	llvm::createAMDGPUAnnotateUniformValues() {
	return new AMDGPUAnnotateUniformValues();
	}