Blame - llvm/lib/Transforms/Scalar/NaryReassociate.cpp - toolchain/llvm-project

blob: d6bc925955b75d5f9563e3230cdf5c6c5f5c8014 [file] [log] [blame]

Jingyue Wu	8cb6b2a	2015-04-14 04:59:22 +0000	[diff] [blame]	1	//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This pass reassociates n-ary add expressions and eliminates the redundancy
				11	// exposed by the reassociation.
				12	//
				13	// A motivating example:
				14	//
				15	// void foo(int a, int b) {
				16	// bar(a + b);
				17	// bar((a + 2) + b);
				18	// }
				19	//
				20	// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
				21	// the above code to
				22	//
				23	// int t = a + b;
				24	// bar(t);
				25	// bar(t + 2);
				26	//
				27	// However, the Reassociate pass is unable to do that because it processes each
				28	// instruction individually and believes (a + 2) + b is the best form according
				29	// to its rank system.
				30	//
				31	// To address this limitation, NaryReassociate reassociates an expression in a
				32	// form that reuses existing instructions. As a result, NaryReassociate can
				33	// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
				34	// (a + b) is computed before.
				35	//
				36	// NaryReassociate works as follows. For every instruction in the form of (a +
				37	// b) + c, it checks whether a + c or b + c is already computed by a dominating
				38	// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
				39	// c) + a respectively. To efficiently look up whether an expression is
				40	// computed before, we store each instruction seen and its SCEV into an
				41	// SCEV-to-instruction map.
				42	//
				43	// Although the algorithm pattern-matches only ternary additions, it
				44	// automatically handles many >3-ary expressions by walking through the function
				45	// in the depth-first order. For example, given
				46	//
				47	// (a + c) + d
				48	// ((a + b) + c) + d
				49	//
				50	// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
				51	// ((a + c) + b) + d into ((a + c) + d) + b.
				52	//
				53	// Limitations and TODO items:
				54	//
				55	// 1) We only considers n-ary adds for now. This should be extended and
				56	// generalized.
				57	//
				58	// 2) Besides arithmetic operations, similar reassociation can be applied to
				59	// GEPs. For example, if
				60	// X = &arr[a]
				61	// dominates
				62	// Y = &arr[a + b]
				63	// we may rewrite Y into X + b.
				64	//
				65	//===----------------------------------------------------------------------===//
				66
				67	#include "llvm/Analysis/ScalarEvolution.h"
				68	#include "llvm/IR/Dominators.h"
				69	#include "llvm/IR/Module.h"
				70	#include "llvm/IR/PatternMatch.h"
				71	#include "llvm/Transforms/Scalar.h"
				72	using namespace llvm;
				73	using namespace PatternMatch;
				74
				75	#define DEBUG_TYPE "nary-reassociate"
				76
				77	namespace {
				78	class NaryReassociate : public FunctionPass {
				79	public:
				80	static char ID;
				81
				82	NaryReassociate(): FunctionPass(ID) {
				83	initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
				84	}
				85
				86	bool runOnFunction(Function &F) override;
				87
				88	void getAnalysisUsage(AnalysisUsage &AU) const override {
				89	AU.addPreserved<DominatorTreeWrapperPass>();
				90	AU.addRequired<DominatorTreeWrapperPass>();
				91	// TODO: can we preserve ScalarEvolution?
				92	AU.addRequired<ScalarEvolution>();
				93	AU.setPreservesCFG();
				94	}
				95
				96	private:
				97	// Reasssociates I to a better form.
				98	Instruction tryReassociateAdd(Instruction I);
				99	// A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
				100	Instruction tryReassociateAdd(Value LHS, Value RHS, Instruction I);
				101	// Rewrites I to LHS + RHS if LHS is computed already.
				102	Instruction tryReassociatedAdd(const SCEV LHS, Value RHS, Instruction I);
				103
				104	DominatorTree *DT;
				105	ScalarEvolution *SE;
				106	// A lookup table quickly telling which instructions compute the given SCEV.
				107	// Note that there can be multiple instructions at different locations
Jingyue Wu	771dfe9	2015-04-16 18:42:31 +0000	[diff] [blame^]	108	// computing to the same SCEV, so we map a SCEV to an instruction list. For
				109	// example,
				110	//
Jingyue Wu	8cb6b2a	2015-04-14 04:59:22 +0000	[diff] [blame]	111	// if (p1)
				112	// foo(a + b);
				113	// if (p2)
				114	// bar(a + b);
				115	DenseMap<const SCEV , SmallVector<Instruction , 2>> SeenExprs;
				116	};
				117	} // anonymous namespace
				118
				119	char NaryReassociate::ID = 0;
				120	INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
				121	false, false)
				122	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
				123	INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
				124	INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
				125	false, false)
				126
				127	FunctionPass *llvm::createNaryReassociatePass() {
				128	return new NaryReassociate();
				129	}
				130
				131	bool NaryReassociate::runOnFunction(Function &F) {
				132	if (skipOptnoneFunction(F))
				133	return false;
				134
				135	DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
				136	SE = &getAnalysis<ScalarEvolution>();
				137
				138	// Traverse the dominator tree in the depth-first order. This order makes sure
				139	// all bases of a candidate are in Candidates when we process it.
				140	bool Changed = false;
				141	SeenExprs.clear();
				142	for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
				143	Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
				144	BasicBlock *BB = Node->getBlock();
				145	for (auto I = BB->begin(); I != BB->end(); ++I) {
				146	if (I->getOpcode() == Instruction::Add) {
				147	if (Instruction *NewI = tryReassociateAdd(I)) {
				148	I->replaceAllUsesWith(NewI);
				149	I->eraseFromParent();
				150	I = NewI;
				151	}
				152	// We should add the rewritten instruction because tryReassociateAdd may
				153	// have invalidated the original one.
				154	SeenExprs[SE->getSCEV(I)].push_back(I);
				155	}
				156	}
				157	}
				158	return Changed;
				159	}
				160
				161	Instruction NaryReassociate::tryReassociateAdd(Instruction I) {
				162	Value LHS = I->getOperand(0), RHS = I->getOperand(1);
				163	if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
				164	return NewI;
				165	if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
				166	return NewI;
				167	return nullptr;
				168	}
				169
				170	Instruction NaryReassociate::tryReassociateAdd(Value LHS, Value *RHS,
				171	Instruction *I) {
				172	Value A = nullptr, B = nullptr;
				173	// To be conservative, we reassociate I only when it is the only user of A+B.
				174	if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
				175	// I = (A + B) + RHS
				176	// = (A + RHS) + B or (B + RHS) + A
				177	const SCEV AExpr = SE->getSCEV(A), BExpr = SE->getSCEV(B);
				178	const SCEV *RHSExpr = SE->getSCEV(RHS);
				179	if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
				180	return NewI;
				181	if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
				182	return NewI;
				183	}
				184	return nullptr;
				185	}
				186
				187	Instruction NaryReassociate::tryReassociatedAdd(const SCEV LHSExpr,
				188	Value RHS, Instruction I) {
				189	auto Pos = SeenExprs.find(LHSExpr);
				190	// Bail out if LHSExpr is not previously seen.
				191	if (Pos == SeenExprs.end())
				192	return nullptr;
				193
				194	auto &LHSCandidates = Pos->second;
Jingyue Wu	771dfe9	2015-04-16 18:42:31 +0000	[diff] [blame^]	195	// Look for the closest dominator LHS of I that computes LHSExpr, and replace
				196	// I with LHS + RHS.
				197	//
				198	// Because we traverse the dominator tree in the pre-order, a
				199	// candidate that doesn't dominate the current instruction won't dominate any
				200	// future instruction either. Therefore, we pop it out of the stack. This
				201	// optimization makes the algorithm O(n).
				202	while (!LHSCandidates.empty()) {
				203	Instruction *LHS = LHSCandidates.back();
				204	if (DT->dominates(LHS, I)) {
				205	Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
Jingyue Wu	8cb6b2a	2015-04-14 04:59:22 +0000	[diff] [blame]	206	NewI->takeName(I);
				207	return NewI;
				208	}
Jingyue Wu	771dfe9	2015-04-16 18:42:31 +0000	[diff] [blame^]	209	LHSCandidates.pop_back();
Jingyue Wu	8cb6b2a	2015-04-14 04:59:22 +0000	[diff] [blame]	210	}
				211	return nullptr;
				212	}