Blame - llvm/lib/Transforms/Scalar/MergeICmps.cpp - toolchain/llvm-project

blob: 5c630d99924b0c5fa06dbf97fd56f6081abac534 [file] [log] [blame]

Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	1	//===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This pass turns chains of integer comparisons into memcmp (the memcmp is
				11	// later typically inlined as a chain of efficient hardware comparisons). This
				12	// typically benefits c++ member or nonmember operator==().
				13	//
				14	// The basic idea is to replace a larger chain of integer comparisons loaded
				15	// from contiguous memory locations into a smaller chain of such integer
				16	// comparisons. Benefits are double:
				17	// - There are less jumps, and therefore less opportunities for mispredictions
				18	// and I-cache misses.
				19	// - Code size is smaller, both because jumps are removed and because the
				20	// encoding of a 2*n byte compare is smaller than that of two n-byte
				21	// compares.
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	22
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	23	//===----------------------------------------------------------------------===//
				24
Eugene Zelenko	5adb96c	2017-10-26 00:55:39 +0000	[diff] [blame]	25	#include <algorithm>
Eugene Zelenko	5adb96c	2017-10-26 00:55:39 +0000	[diff] [blame]	26	#include <numeric>
				27	#include <utility>
				28	#include <vector>
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	29	#include "llvm/Analysis/Loads.h"
				30	#include "llvm/Analysis/TargetLibraryInfo.h"
				31	#include "llvm/Analysis/TargetTransformInfo.h"
				32	#include "llvm/IR/Function.h"
				33	#include "llvm/IR/IRBuilder.h"
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	34	#include "llvm/Pass.h"
				35	#include "llvm/Transforms/Scalar.h"
				36	#include "llvm/Transforms/Utils/BuildLibCalls.h"
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	37
				38	using namespace llvm;
				39
Eugene Zelenko	5adb96c	2017-10-26 00:55:39 +0000	[diff] [blame]	40	namespace {
				41
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	42	#define DEBUG_TYPE "mergeicmps"
				43
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	44	// A BCE atom.
				45	struct BCEAtom {
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	46	BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
Clement Courbet	bc0c445	2017-09-01 11:51:23 +0000	[diff] [blame]	47
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	48	const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
				49
				50	bool operator<(const BCEAtom &O) const {
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	51	assert(Base() && "invalid atom");
				52	assert(O.Base() && "invalid atom");
				53	// Just ordering by (Base(), Offset) is sufficient. However because this
				54	// means that the ordering will depend on the addresses of the base
				55	// values, which are not reproducible from run to run. To guarantee
				56	// stability, we use the names of the values if they exist; we sort by:
				57	// (Base.getName(), Base(), Offset).
				58	const int NameCmp = Base()->getName().compare(O.Base()->getName());
				59	if (NameCmp == 0) {
				60	if (Base() == O.Base()) {
				61	return Offset.slt(O.Offset);
				62	}
				63	return Base() < O.Base();
				64	}
				65	return NameCmp < 0;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	66	}
				67
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	68	GetElementPtrInst *GEP;
				69	LoadInst *LoadI;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	70	APInt Offset;
				71	};
				72
				73	// If this value is a load from a constant offset w.r.t. a base address, and
Xin Tong	256869d	2018-02-28 12:09:53 +0000	[diff] [blame]	74	// there are no other users of the load or address, returns the base address and
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	75	// the offset.
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	76	BCEAtom visitICmpLoadOperand(Value *const Val) {
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	77	BCEAtom Result;
				78	if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
				79	DEBUG(dbgs() << "load\n");
				80	if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
				81	DEBUG(dbgs() << "used outside of block\n");
				82	return {};
				83	}
				84	if (LoadI->isVolatile()) {
				85	DEBUG(dbgs() << "volatile\n");
				86	return {};
				87	}
				88	Value *const Addr = LoadI->getOperand(0);
				89	if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
				90	DEBUG(dbgs() << "GEP\n");
				91	if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
				92	DEBUG(dbgs() << "used outside of block\n");
				93	return {};
				94	}
				95	const auto &DL = GEP->getModule()->getDataLayout();
				96	if (!isDereferenceablePointer(GEP, DL)) {
				97	DEBUG(dbgs() << "not dereferenceable\n");
				98	// We need to make sure that we can do comparison in any order, so we
				99	// require memory to be unconditionnally dereferencable.
				100	return {};
				101	}
				102	Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
				103	if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
				104	Result.GEP = GEP;
				105	Result.LoadI = LoadI;
				106	}
				107	}
				108	}
				109	return Result;
				110	}
				111
				112	// A basic block with a comparison between two BCE atoms.
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	113	// The block might do extra work besides the atom comparison, in which case
				114	// doesOtherWork() returns true. Under some conditions, the block can be
				115	// split into the atom comparison part and the "other work" part
				116	// (see canSplit()).
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	117	// Note: the terminology is misleading: the comparison is symmetric, so there
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	118	// is no real {l/r}hs. What we want though is to have the same base on the
				119	// left (resp. right), so that we can detect consecutive loads. To ensure this
				120	// we put the smallest atom on the left.
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	121	class BCECmpBlock {
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	122	public:
				123	BCECmpBlock() {}
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	124
				125	BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
				126	: Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	127	if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	128	}
				129
				130	bool IsValid() const {
				131	return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
				132	}
				133
Hiroshi Inoue	d24ddcd	2018-01-19 10:55:29 +0000	[diff] [blame]	134	// Assert the block is consistent: If valid, it should also have
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	135	// non-null members besides Lhs_ and Rhs_.
				136	void AssertConsistent() const {
				137	if (IsValid()) {
				138	assert(BB);
				139	assert(CmpI);
				140	assert(BranchI);
				141	}
				142	}
				143
				144	const BCEAtom &Lhs() const { return Lhs_; }
				145	const BCEAtom &Rhs() const { return Rhs_; }
				146	int SizeBits() const { return SizeBits_; }
				147
				148	// Returns true if the block does other works besides comparison.
				149	bool doesOtherWork() const;
				150
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	151	// Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
				152	// instructions in the block.
				153	bool canSplit() const;
				154
				155	// Return true if this all the relevant instructions in the BCE-cmp-block can
				156	// be sunk below this instruction. By doing this, we know we can separate the
				157	// BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
				158	// block.
				159	bool canSinkBCECmpInst(const Instruction , DenseSet<Instruction > &) const;
				160
				161	// We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
				162	// instructions. Split the old block and move all non-BCE-cmp-insts into the
				163	// new parent block.
				164	void split(BasicBlock *NewParent) const;
				165
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	166	// The basic block where this comparison happens.
				167	BasicBlock *BB = nullptr;
				168	// The ICMP for this comparison.
				169	ICmpInst *CmpI = nullptr;
				170	// The terminating branch.
				171	BranchInst *BranchI = nullptr;
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	172	// The block requires splitting.
				173	bool RequireSplit = false;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	174
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	175	private:
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	176	BCEAtom Lhs_;
				177	BCEAtom Rhs_;
				178	int SizeBits_ = 0;
				179	};
				180
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	181	bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
				182	DenseSet<Instruction *> &BlockInsts) const {
				183	// If this instruction has side effects and its in middle of the BCE cmp block
				184	// instructions, then bail for now.
				185	// TODO: use alias analysis to tell whether there is real interference.
				186	if (Inst->mayHaveSideEffects())
				187	return false;
				188	// Make sure this instruction does not use any of the BCE cmp block
				189	// instructions as operand.
				190	for (auto BI : BlockInsts) {
				191	if (is_contained(Inst->operands(), BI))
				192	return false;
				193	}
				194	return true;
				195	}
				196
				197	void BCECmpBlock::split(BasicBlock *NewParent) const {
				198	DenseSet<Instruction *> BlockInsts(
				199	{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
				200	llvm::SmallVector<Instruction *, 4> OtherInsts;
				201	for (Instruction &Inst : *BB) {
				202	if (BlockInsts.count(&Inst))
				203	continue;
				204	assert(canSinkBCECmpInst(&Inst, BlockInsts) && "Split unsplittable block");
				205	// This is a non-BCE-cmp-block instruction. And it can be separated
				206	// from the BCE-cmp-block instruction.
				207	OtherInsts.push_back(&Inst);
				208	}
				209
				210	// Do the actual spliting.
				211	for (Instruction *Inst : reverse(OtherInsts)) {
				212	Inst->moveBefore(&*NewParent->begin());
				213	}
				214	}
				215
				216	bool BCECmpBlock::canSplit() const {
				217	DenseSet<Instruction *> BlockInsts(
				218	{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
				219	for (Instruction &Inst : *BB) {
				220	if (!BlockInsts.count(&Inst)) {
				221	if (!canSinkBCECmpInst(&Inst, BlockInsts))
				222	return false;
				223	}
				224	}
				225	return true;
				226	}
				227
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	228	bool BCECmpBlock::doesOtherWork() const {
				229	AssertConsistent();
Xin Tong	8fd561f	2018-03-06 02:24:02 +0000	[diff] [blame]	230	// All the instructions we care about in the BCE cmp block.
				231	DenseSet<Instruction *> BlockInsts(
				232	{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	233	// TODO(courbet): Can we allow some other things ? This is very conservative.
				234	// We might be able to get away with anything does does not have any side
				235	// effects outside of the basic block.
				236	// Note: The GEPs and/or loads are not necessarily in the same block.
				237	for (const Instruction &Inst : *BB) {
Xin Tong	8fd561f	2018-03-06 02:24:02 +0000	[diff] [blame]	238	if (!BlockInsts.count(&Inst))
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	239	return true;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	240	}
				241	return false;
				242	}
				243
				244	// Visit the given comparison. If this is a comparison between two valid
				245	// BCE atoms, returns the comparison.
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	246	BCECmpBlock visitICmp(const ICmpInst *const CmpI,
				247	const ICmpInst::Predicate ExpectedPredicate) {
Clement Courbet	9f0b317	2018-03-13 07:05:55 +0000	[diff] [blame]	248	// The comparison can only be used once:
				249	// - For intermediate blocks, as a branch condition.
				250	// - For the final block, as an incoming value for the Phi.
				251	// If there are any other uses of the comparison, we cannot merge it with
				252	// other comparisons as we would create an orphan use of the value.
				253	if (!CmpI->hasOneUse()) {
				254	DEBUG(dbgs() << "cmp has several uses\n");
				255	return {};
				256	}
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	257	if (CmpI->getPredicate() == ExpectedPredicate) {
				258	DEBUG(dbgs() << "cmp "
				259	<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
				260	<< "\n");
				261	auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	262	if (!Lhs.Base()) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	263	auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	264	if (!Rhs.Base()) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	265	return BCECmpBlock(std::move(Lhs), std::move(Rhs),
				266	CmpI->getOperand(0)->getType()->getScalarSizeInBits());
				267	}
				268	return {};
				269	}
				270
				271	// Visit the given comparison block. If this is a comparison between two valid
				272	// BCE atoms, returns the comparison.
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	273	BCECmpBlock visitCmpBlock(Value const Val, BasicBlock const Block,
				274	const BasicBlock *const PhiBlock) {
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	275	if (Block->empty()) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	276	auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	277	if (!BranchI) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	278	DEBUG(dbgs() << "branch\n");
				279	if (BranchI->isUnconditional()) {
				280	// In this case, we expect an incoming value which is the result of the
				281	// comparison. This is the last link in the chain of comparisons (note
				282	// that this does not mean that this is the last incoming value, blocks
				283	// can be reordered).
				284	auto *const CmpI = dyn_cast<ICmpInst>(Val);
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	285	if (!CmpI) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	286	DEBUG(dbgs() << "icmp\n");
				287	auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
				288	Result.CmpI = CmpI;
				289	Result.BranchI = BranchI;
				290	return Result;
				291	} else {
				292	// In this case, we expect a constant incoming value (the comparison is
				293	// chained).
				294	const auto *const Const = dyn_cast<ConstantInt>(Val);
				295	DEBUG(dbgs() << "const\n");
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	296	if (!Const->isZero()) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	297	DEBUG(dbgs() << "false\n");
				298	auto *const CmpI = dyn_cast<ICmpInst>(BranchI->getCondition());
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	299	if (!CmpI) return {};
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	300	DEBUG(dbgs() << "icmp\n");
				301	assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
				302	BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
				303	auto Result = visitICmp(
				304	CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
				305	Result.CmpI = CmpI;
				306	Result.BranchI = BranchI;
				307	return Result;
				308	}
				309	return {};
				310	}
				311
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	312	static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
				313	BCECmpBlock &Comparison) {
				314	DEBUG(dbgs() << "Block '" << Comparison.BB->getName() << "': Found cmp of "
				315	<< Comparison.SizeBits() << " bits between "
				316	<< Comparison.Lhs().Base() << " + " << Comparison.Lhs().Offset
				317	<< " and " << Comparison.Rhs().Base() << " + "
				318	<< Comparison.Rhs().Offset << "\n");
				319	DEBUG(dbgs() << "\n");
				320	Comparisons.push_back(Comparison);
				321	}
				322
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	323	// A chain of comparisons.
				324	class BCECmpChain {
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	325	public:
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	326	BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi);
				327
				328	int size() const { return Comparisons_.size(); }
				329
				330	#ifdef MERGEICMPS_DOT_ON
				331	void dump() const;
				332	#endif // MERGEICMPS_DOT_ON
				333
				334	bool simplify(const TargetLibraryInfo *const TLI);
				335
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	336	private:
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	337	static bool IsContiguous(const BCECmpBlock &First,
				338	const BCECmpBlock &Second) {
				339	return First.Lhs().Base() == Second.Lhs().Base() &&
				340	First.Rhs().Base() == Second.Rhs().Base() &&
				341	First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
				342	First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
				343	}
				344
				345	// Merges the given comparison blocks into one memcmp block and update
				346	// branches. Comparisons are assumed to be continguous. If NextBBInChain is
				347	// null, the merged block will link to the phi block.
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	348	void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
				349	BasicBlock *const NextBBInChain, PHINode &Phi,
				350	const TargetLibraryInfo *const TLI);
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	351
				352	PHINode &Phi_;
				353	std::vector<BCECmpBlock> Comparisons_;
				354	// The original entry block (before sorting);
				355	BasicBlock *EntryBlock_;
				356	};
				357
				358	BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
				359	: Phi_(Phi) {
Clement Courbet	c2109c8	2018-02-06 09:14:00 +0000	[diff] [blame]	360	assert(!Blocks.empty() && "a chain should have at least one block");
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	361	// Now look inside blocks to check for BCE comparisons.
				362	std::vector<BCECmpBlock> Comparisons;
Clement Courbet	a7a1746	2018-02-06 12:25:33 +0000	[diff] [blame]	363	for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
				364	BasicBlock *const Block = Blocks[BlockIdx];
Clement Courbet	c2109c8	2018-02-06 09:14:00 +0000	[diff] [blame]	365	assert(Block && "invalid block");
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	366	BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
				367	Block, Phi.getParent());
				368	Comparison.BB = Block;
				369	if (!Comparison.IsValid()) {
				370	DEBUG(dbgs() << "skip: not a valid BCECmpBlock\n");
				371	return;
				372	}
				373	if (Comparison.doesOtherWork()) {
Clement Courbet	34be1b0	2018-03-05 08:21:47 +0000	[diff] [blame]	374	DEBUG(dbgs() << "block '" << Comparison.BB->getName()
				375	<< "' does extra work besides compare\n");
Xin Tong	8345c0e	2018-03-05 13:54:47 +0000	[diff] [blame]	376	if (Comparisons.empty()) {
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	377	// This is the initial block in the chain, in case this block does other
				378	// work, we can try to split the block and move the irrelevant
				379	// instructions to the predecessor.
				380	//
				381	// If this is not the initial block in the chain, splitting it wont
				382	// work.
				383	//
				384	// As once split, there will still be instructions before the BCE cmp
				385	// instructions that do other work in program order, i.e. within the
				386	// chain before sorting. Unless we can abort the chain at this point
				387	// and start anew.
				388	//
				389	// NOTE: we only handle block with single predecessor for now.
				390	if (Comparison.canSplit()) {
				391	DEBUG(dbgs() << "Split initial block '" << Comparison.BB->getName()
				392	<< "' that does extra work besides compare\n");
				393	Comparison.RequireSplit = true;
				394	enqueueBlock(Comparisons, Comparison);
				395	} else {
				396	DEBUG(dbgs() << "ignoring initial block '" << Comparison.BB->getName()
				397	<< "' that does extra work besides compare\n");
				398	}
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	399	continue;
				400	}
				401	// TODO(courbet): Right now we abort the whole chain. We could be
				402	// merging only the blocks that don't do other work and resume the
				403	// chain from there. For example:
				404	// if (a[0] == b[0]) { // bb1
				405	// if (a[1] == b[1]) { // bb2
				406	// some_value = 3; //bb3
				407	// if (a[2] == b[2]) { //bb3
				408	// do a ton of stuff //bb4
				409	// }
				410	// }
				411	// }
				412	//
				413	// This is:
				414	//
				415	// bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+
				416	// \ \ \ \
				417	// ne ne ne \
				418	// \ \ \ v
				419	// +------------+-----------+----------> bb_phi
				420	//
				421	// We can only merge the first two comparisons, because bb3* does
				422	// "other work" (setting some_value to 3).
				423	// We could still merge bb1 and bb2 though.
				424	return;
				425	}
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	426	enqueueBlock(Comparisons, Comparison);
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	427	}
Xin Tong	8345c0e	2018-03-05 13:54:47 +0000	[diff] [blame]	428
				429	// It is possible we have no suitable comparison to merge.
				430	if (Comparisons.empty()) {
				431	DEBUG(dbgs() << "chain with no BCE basic blocks, no merge\n");
				432	return;
				433	}
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	434	EntryBlock_ = Comparisons[0].BB;
				435	Comparisons_ = std::move(Comparisons);
				436	#ifdef MERGEICMPS_DOT_ON
				437	errs() << "BEFORE REORDERING:\n\n";
				438	dump();
				439	#endif // MERGEICMPS_DOT_ON
				440	// Reorder blocks by LHS. We can do that without changing the
				441	// semantics because we are only accessing dereferencable memory.
				442	std::sort(Comparisons_.begin(), Comparisons_.end(),
				443	[](const BCECmpBlock &a, const BCECmpBlock &b) {
				444	return a.Lhs() < b.Lhs();
				445	});
				446	#ifdef MERGEICMPS_DOT_ON
				447	errs() << "AFTER REORDERING:\n\n";
				448	dump();
				449	#endif // MERGEICMPS_DOT_ON
				450	}
				451
				452	#ifdef MERGEICMPS_DOT_ON
				453	void BCECmpChain::dump() const {
				454	errs() << "digraph dag {\n";
				455	errs() << " graph [bgcolor=transparent];\n";
				456	errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
				457	errs() << " edge [color=black];\n";
				458	for (size_t I = 0; I < Comparisons_.size(); ++I) {
				459	const auto &Comparison = Comparisons_[I];
				460	errs() << " \"" << I << "\" [label=\"%"
				461	<< Comparison.Lhs().Base()->getName() << " + "
				462	<< Comparison.Lhs().Offset << " == %"
				463	<< Comparison.Rhs().Base()->getName() << " + "
				464	<< Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
				465	<< " bytes)\"];\n";
				466	const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	467	if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	468	errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
				469	}
				470	errs() << " \"Phi\" [label=\"Phi\"];\n";
				471	errs() << "}\n\n";
				472	}
				473	#endif // MERGEICMPS_DOT_ON
				474
				475	bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
				476	// First pass to check if there is at least one merge. If not, we don't do
				477	// anything and we keep analysis passes intact.
				478	{
				479	bool AtLeastOneMerged = false;
				480	for (size_t I = 1; I < Comparisons_.size(); ++I) {
				481	if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
				482	AtLeastOneMerged = true;
				483	break;
				484	}
				485	}
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	486	if (!AtLeastOneMerged) return false;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	487	}
				488
				489	// Remove phi references to comparison blocks, they will be rebuilt as we
				490	// merge the blocks.
				491	for (const auto &Comparison : Comparisons_) {
				492	Phi_.removeIncomingValue(Comparison.BB, false);
				493	}
				494
Xin Tong	bdbd97e	2018-03-20 11:57:54 +0000	[diff] [blame]	495	// If entry block is part of the chain, we need to make the first block
				496	// of the chain the new entry block of the function.
				497	BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock();
				498	for (size_t I = 1; I < Comparisons_.size(); ++I) {
				499	if (Entry == Comparisons_[I].BB) {
				500	BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "",
				501	Entry->getParent(), Entry);
				502	BranchInst::Create(Entry, NEntryBB);
Xin Tong	a713ebe	2018-03-20 12:03:25 +0000	[diff] [blame]	503	break;
Xin Tong	bdbd97e	2018-03-20 11:57:54 +0000	[diff] [blame]	504	}
				505	}
				506
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	507	// Point the predecessors of the chain to the first comparison block (which is
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	508	// the new entry point) and update the entry block of the chain.
				509	if (EntryBlock_ != Comparisons_[0].BB) {
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	510	EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	511	EntryBlock_ = Comparisons_[0].BB;
				512	}
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	513
				514	// Effectively merge blocks.
				515	int NumMerged = 1;
				516	for (size_t I = 1; I < Comparisons_.size(); ++I) {
				517	if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
				518	++NumMerged;
				519	} else {
				520	// Merge all previous comparisons and start a new merge block.
				521	mergeComparisons(
				522	makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
				523	Comparisons_[I].BB, Phi_, TLI);
				524	NumMerged = 1;
				525	}
				526	}
				527	mergeComparisons(makeArrayRef(Comparisons_)
				528	.slice(Comparisons_.size() - NumMerged, NumMerged),
				529	nullptr, Phi_, TLI);
				530
				531	return true;
				532	}
				533
				534	void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
				535	BasicBlock *const NextBBInChain,
				536	PHINode &Phi,
				537	const TargetLibraryInfo *const TLI) {
				538	assert(!Comparisons.empty());
				539	const auto &FirstComparison = *Comparisons.begin();
				540	BasicBlock *const BB = FirstComparison.BB;
				541	LLVMContext &Context = BB->getContext();
				542
				543	if (Comparisons.size() >= 2) {
Xin Tong	0efadbb	2018-04-09 13:14:06 +0000	[diff] [blame^]	544	// If there is one block that requires splitting, we do it now, i.e.
				545	// just before we know we will collapse the chain. The instructions
				546	// can be executed before any of the instructions in the chain.
				547	auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
				548	[](const BCECmpBlock &B) { return B.RequireSplit; });
				549	if (C != Comparisons.end())
				550	C->split(EntryBlock_);
				551
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	552	DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
				553	const auto TotalSize =
				554	std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
				555	[](int Size, const BCECmpBlock &C) {
				556	return Size + C.SizeBits();
				557	}) /
				558	8;
				559
				560	// Incoming edges do not need to be updated, and both GEPs are already
				561	// computing the right address, we just need to:
				562	// - replace the two loads and the icmp with the memcmp
				563	// - update the branch
				564	// - update the incoming values in the phi.
				565	FirstComparison.BranchI->eraseFromParent();
				566	FirstComparison.CmpI->eraseFromParent();
				567	FirstComparison.Lhs().LoadI->eraseFromParent();
				568	FirstComparison.Rhs().LoadI->eraseFromParent();
				569
				570	IRBuilder<> Builder(BB);
				571	const auto &DL = Phi.getModule()->getDataLayout();
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	572	Value *const MemCmpCall = emitMemCmp(
Xin Tong	0272cb0	2018-03-27 19:43:02 +0000	[diff] [blame]	573	FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
				574	ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	575	Builder, DL, TLI);
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	576	Value *const MemCmpIsZero = Builder.CreateICmpEQ(
				577	MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
				578
				579	// Add a branch to the next basic block in the chain.
				580	if (NextBBInChain) {
				581	Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
				582	Phi.addIncoming(ConstantInt::getFalse(Context), BB);
				583	} else {
				584	Builder.CreateBr(Phi.getParent());
				585	Phi.addIncoming(MemCmpIsZero, BB);
				586	}
				587
				588	// Delete merged blocks.
				589	for (size_t I = 1; I < Comparisons.size(); ++I) {
				590	BasicBlock *CBB = Comparisons[I].BB;
				591	CBB->replaceAllUsesWith(BB);
				592	CBB->eraseFromParent();
				593	}
				594	} else {
				595	assert(Comparisons.size() == 1);
				596	// There are no blocks to merge, but we still need to update the branches.
				597	DEBUG(dbgs() << "Only one comparison, updating branches\n");
				598	if (NextBBInChain) {
				599	if (FirstComparison.BranchI->isConditional()) {
				600	DEBUG(dbgs() << "conditional -> conditional\n");
				601	// Just update the "true" target, the "false" target should already be
				602	// the phi block.
				603	assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
				604	FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
				605	Phi.addIncoming(ConstantInt::getFalse(Context), BB);
				606	} else {
				607	DEBUG(dbgs() << "unconditional -> conditional\n");
				608	// Replace the unconditional branch by a conditional one.
				609	FirstComparison.BranchI->eraseFromParent();
				610	IRBuilder<> Builder(BB);
				611	Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
				612	Phi.getParent());
				613	Phi.addIncoming(FirstComparison.CmpI, BB);
				614	}
				615	} else {
				616	if (FirstComparison.BranchI->isConditional()) {
				617	DEBUG(dbgs() << "conditional -> unconditional\n");
				618	// Replace the conditional branch by an unconditional one.
				619	FirstComparison.BranchI->eraseFromParent();
				620	IRBuilder<> Builder(BB);
				621	Builder.CreateBr(Phi.getParent());
				622	Phi.addIncoming(FirstComparison.CmpI, BB);
				623	} else {
				624	DEBUG(dbgs() << "unconditional -> unconditional\n");
				625	Phi.addIncoming(FirstComparison.CmpI, BB);
				626	}
				627	}
				628	}
				629	}
				630
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	631	std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
				632	BasicBlock *const LastBlock,
				633	int NumBlocks) {
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	634	// Walk up from the last block to find other blocks.
				635	std::vector<BasicBlock *> Blocks(NumBlocks);
Clement Courbet	c2109c8	2018-02-06 09:14:00 +0000	[diff] [blame]	636	assert(LastBlock && "invalid last block");
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	637	BasicBlock *CurBlock = LastBlock;
				638	for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
				639	if (CurBlock->hasAddressTaken()) {
				640	// Somebody is jumping to the block through an address, all bets are
				641	// off.
				642	DEBUG(dbgs() << "skip: block " << BlockIndex
				643	<< " has its address taken\n");
				644	return {};
				645	}
				646	Blocks[BlockIndex] = CurBlock;
				647	auto *SinglePredecessor = CurBlock->getSinglePredecessor();
				648	if (!SinglePredecessor) {
				649	// The block has two or more predecessors.
				650	DEBUG(dbgs() << "skip: block " << BlockIndex
				651	<< " has two or more predecessors\n");
				652	return {};
				653	}
				654	if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) {
				655	// The block does not link back to the phi.
				656	DEBUG(dbgs() << "skip: block " << BlockIndex
				657	<< " does not link back to the phi\n");
				658	return {};
				659	}
				660	CurBlock = SinglePredecessor;
				661	}
				662	Blocks[0] = CurBlock;
				663	return Blocks;
				664	}
				665
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	666	bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	667	DEBUG(dbgs() << "processPhi()\n");
				668	if (Phi.getNumIncomingValues() <= 1) {
				669	DEBUG(dbgs() << "skip: only one incoming value in phi\n");
				670	return false;
				671	}
				672	// We are looking for something that has the following structure:
				673	// bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+
				674	// \ \ \ \
				675	// ne ne ne \
				676	// \ \ \ v
				677	// +------------+-----------+----------> bb_phi
				678	//
				679	// - The last basic block (bb4 here) must branch unconditionally to bb_phi.
				680	// It's the only block that contributes a non-constant value to the Phi.
				681	// - All other blocks (b1, b2, b3) must have exactly two successors, one of
Hiroshi Inoue	d24ddcd	2018-01-19 10:55:29 +0000	[diff] [blame]	682	// them being the phi block.
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	683	// - All intermediate blocks (bb2, bb3) must have only one predecessor.
				684	// - Blocks cannot do other work besides the comparison, see doesOtherWork()
				685
				686	// The blocks are not necessarily ordered in the phi, so we start from the
				687	// last block and reconstruct the order.
				688	BasicBlock *LastBlock = nullptr;
				689	for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) {
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	690	if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	691	if (LastBlock) {
				692	// There are several non-constant values.
				693	DEBUG(dbgs() << "skip: several non-constant values\n");
				694	return false;
				695	}
Xin Tong	8ba674e	2018-02-28 12:08:00 +0000	[diff] [blame]	696	if (!isa<ICmpInst>(Phi.getIncomingValue(I)) \|\|
				697	cast<ICmpInst>(Phi.getIncomingValue(I))->getParent() !=
				698	Phi.getIncomingBlock(I)) {
				699	// Non-constant incoming value is not from a cmp instruction or not
				700	// produced by the last block. We could end up processing the value
				701	// producing block more than once.
				702	//
				703	// This is an uncommon case, so we bail.
				704	DEBUG(
				705	dbgs()
				706	<< "skip: non-constant value not from cmp or not from last block.\n");
				707	return false;
				708	}
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	709	LastBlock = Phi.getIncomingBlock(I);
				710	}
				711	if (!LastBlock) {
				712	// There is no non-constant block.
				713	DEBUG(dbgs() << "skip: no non-constant block\n");
				714	return false;
				715	}
				716	if (LastBlock->getSingleSuccessor() != Phi.getParent()) {
				717	DEBUG(dbgs() << "skip: last block non-phi successor\n");
				718	return false;
				719	}
				720
				721	const auto Blocks =
				722	getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	723	if (Blocks.empty()) return false;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	724	BCECmpChain CmpChain(Blocks, Phi);
				725
				726	if (CmpChain.size() < 2) {
				727	DEBUG(dbgs() << "skip: only one compare block\n");
				728	return false;
				729	}
				730
				731	return CmpChain.simplify(TLI);
				732	}
				733
				734	class MergeICmps : public FunctionPass {
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	735	public:
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	736	static char ID;
				737
				738	MergeICmps() : FunctionPass(ID) {
				739	initializeMergeICmpsPass(*PassRegistry::getPassRegistry());
				740	}
				741
				742	bool runOnFunction(Function &F) override {
				743	if (skipFunction(F)) return false;
				744	const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	745	const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
				746	auto PA = runImpl(F, &TLI, &TTI);
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	747	return !PA.areAllPreserved();
				748	}
				749
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	750	private:
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	751	void getAnalysisUsage(AnalysisUsage &AU) const override {
				752	AU.addRequired<TargetLibraryInfoWrapperPass>();
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	753	AU.addRequired<TargetTransformInfoWrapperPass>();
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	754	}
				755
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	756	PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
				757	const TargetTransformInfo *TTI);
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	758	};
				759
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	760	PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
				761	const TargetTransformInfo *TTI) {
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	762	DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
				763
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	764	// We only try merging comparisons if the target wants to expand memcmp later.
				765	// The rationale is to avoid turning small chains into memcmp calls.
Clement Courbet	b2c3eb8	2017-10-30 14:19:33 +0000	[diff] [blame]	766	if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	767
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	768	bool MadeChange = false;
				769
				770	for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
				771	// A Phi operation is always first in a basic block.
				772	if (auto const Phi = dyn_cast<PHINode>(&BBIt->begin()))
				773	MadeChange \|= processPhi(*Phi, TLI);
				774	}
				775
Clement Courbet	98eaa88	2017-10-04 15:13:52 +0000	[diff] [blame]	776	if (MadeChange) return PreservedAnalyses::none();
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	777	return PreservedAnalyses::all();
				778	}
				779
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	780	} // namespace
Eugene Zelenko	5adb96c	2017-10-26 00:55:39 +0000	[diff] [blame]	781
Eugene Zelenko	5c2aece	2017-10-26 01:25:14 +0000	[diff] [blame]	782	char MergeICmps::ID = 0;
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	783	INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
				784	"Merge contiguous icmps into a memcmp", false, false)
				785	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
Clement Courbet	e2e8a5c	2017-10-10 08:00:45 +0000	[diff] [blame]	786	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Clement Courbet	65130e2	2017-09-01 10:56:34 +0000	[diff] [blame]	787	INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
				788	"Merge contiguous icmps into a memcmp", false, false)
				789
				790	Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }