blob: 213416d08610e2a8f55f895a46bea9373042d5e8 [file] [log] [blame]
Clement Courbet063bed92017-11-03 12:12:27 +00001//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Clement Courbet063bed92017-11-03 12:12:27 +00006//
7//===----------------------------------------------------------------------===//
8//
Clement Courbet6f42de32017-12-18 07:32:48 +00009// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
Clement Courbet063bed92017-11-03 12:12:27 +000011//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/Statistic.h"
15#include "llvm/Analysis/ConstantFolding.h"
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -080016#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
17#include "llvm/Analysis/ProfileSummaryInfo.h"
Clement Courbet063bed92017-11-03 12:12:27 +000018#include "llvm/Analysis/TargetLibraryInfo.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/Analysis/ValueTracking.h"
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000021#include "llvm/CodeGen/TargetLowering.h"
22#include "llvm/CodeGen/TargetPassConfig.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000023#include "llvm/CodeGen/TargetSubtargetInfo.h"
Clement Courbet063bed92017-11-03 12:12:27 +000024#include "llvm/IR/IRBuilder.h"
Reid Kleckner05da2fe2019-11-13 13:15:01 -080025#include "llvm/InitializePasses.h"
Clement Courbet6518b722020-03-03 13:17:21 +010026#include "llvm/Transforms/Utils/Local.h"
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -080027#include "llvm/Transforms/Utils/SizeOpts.h"
Clement Courbet063bed92017-11-03 12:12:27 +000028
29using namespace llvm;
30
31#define DEBUG_TYPE "expandmemcmp"
32
33STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
34STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
35STATISTIC(NumMemCmpGreaterThanMax,
36 "Number of memcmp calls with size greater than max size");
37STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
38
Sanjay Patelf3449872018-01-03 20:02:39 +000039static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
Clement Courbet063bed92017-11-03 12:12:27 +000040 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
41 cl::desc("The number of loads per basic block for inline expansion of "
42 "memcmp that is only being compared against zero."));
43
Hiroshi Yamauchic27ff0d2019-04-12 15:05:46 +000044static cl::opt<unsigned> MaxLoadsPerMemcmp(
45 "max-loads-per-memcmp", cl::Hidden,
46 cl::desc("Set maximum number of loads used in expanded memcmp"));
47
48static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
49 "max-loads-per-memcmp-opt-size", cl::Hidden,
50 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
51
Clement Courbet063bed92017-11-03 12:12:27 +000052namespace {
53
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000054
Clement Courbet063bed92017-11-03 12:12:27 +000055// This class provides helper functions to expand a memcmp library call into an
56// inline expansion.
57class MemCmpExpansion {
58 struct ResultBlock {
59 BasicBlock *BB = nullptr;
60 PHINode *PhiSrc1 = nullptr;
61 PHINode *PhiSrc2 = nullptr;
62
63 ResultBlock() = default;
64 };
65
66 CallInst *const CI;
67 ResultBlock ResBlock;
68 const uint64_t Size;
69 unsigned MaxLoadSize;
70 uint64_t NumLoadsNonOneByte;
Sanjay Patelf3449872018-01-03 20:02:39 +000071 const uint64_t NumLoadsPerBlockForZeroCmp;
Clement Courbet063bed92017-11-03 12:12:27 +000072 std::vector<BasicBlock *> LoadCmpBlocks;
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000073 BasicBlock *EndBlock;
Clement Courbet063bed92017-11-03 12:12:27 +000074 PHINode *PhiRes;
75 const bool IsUsedForZeroCmp;
76 const DataLayout &DL;
77 IRBuilder<> Builder;
78 // Represents the decomposition in blocks of the expansion. For example,
79 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
Clement Courbetb0ae20d2020-03-03 11:06:37 +010080 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
Clement Courbet063bed92017-11-03 12:12:27 +000081 struct LoadEntry {
82 LoadEntry(unsigned LoadSize, uint64_t Offset)
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000083 : LoadSize(LoadSize), Offset(Offset) {
84 }
Clement Courbet063bed92017-11-03 12:12:27 +000085
Clement Courbet063bed92017-11-03 12:12:27 +000086 // The size of the load for this block, in bytes.
Clement Courbet36a34802018-12-20 13:01:04 +000087 unsigned LoadSize;
88 // The offset of this load from the base pointer, in bytes.
89 uint64_t Offset;
Clement Courbet063bed92017-11-03 12:12:27 +000090 };
Clement Courbet36a34802018-12-20 13:01:04 +000091 using LoadEntryVector = SmallVector<LoadEntry, 8>;
92 LoadEntryVector LoadSequence;
Clement Courbet063bed92017-11-03 12:12:27 +000093
94 void createLoadCmpBlocks();
95 void createResultBlock();
96 void setupResultBlockPHINodes();
97 void setupEndBlockPHINodes();
98 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
99 void emitLoadCompareBlock(unsigned BlockIndex);
100 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
101 unsigned &LoadIndex);
Clement Courbet36a34802018-12-20 13:01:04 +0000102 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
Clement Courbet063bed92017-11-03 12:12:27 +0000103 void emitMemCmpResultBlock();
104 Value *getMemCmpExpansionZeroCase();
105 Value *getMemCmpEqZeroOneBlock();
106 Value *getMemCmpOneBlock();
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100107 struct LoadPair {
108 Value *Lhs = nullptr;
109 Value *Rhs = nullptr;
110 };
111 LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
112 unsigned OffsetBytes);
Clement Courbet063bed92017-11-03 12:12:27 +0000113
Clement Courbet36a34802018-12-20 13:01:04 +0000114 static LoadEntryVector
115 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
116 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
117 static LoadEntryVector
118 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
119 unsigned MaxNumLoads,
120 unsigned &NumLoadsNonOneByte);
121
122public:
Clement Courbet063bed92017-11-03 12:12:27 +0000123 MemCmpExpansion(CallInst *CI, uint64_t Size,
124 const TargetTransformInfo::MemCmpExpansionOptions &Options,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000125 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
Clement Courbet063bed92017-11-03 12:12:27 +0000126
127 unsigned getNumBlocks();
128 uint64_t getNumLoads() const { return LoadSequence.size(); }
129
130 Value *getMemCmpExpansion();
131};
132
Clement Courbet36a34802018-12-20 13:01:04 +0000133MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
134 uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
135 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
136 NumLoadsNonOneByte = 0;
137 LoadEntryVector LoadSequence;
138 uint64_t Offset = 0;
139 while (Size && !LoadSizes.empty()) {
140 const unsigned LoadSize = LoadSizes.front();
141 const uint64_t NumLoadsForThisSize = Size / LoadSize;
142 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
143 // Do not expand if the total number of loads is larger than what the
144 // target allows. Note that it's important that we exit before completing
145 // the expansion to avoid using a ton of memory to store the expansion for
146 // large sizes.
147 return {};
148 }
149 if (NumLoadsForThisSize > 0) {
150 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
151 LoadSequence.push_back({LoadSize, Offset});
152 Offset += LoadSize;
153 }
154 if (LoadSize > 1)
155 ++NumLoadsNonOneByte;
156 Size = Size % LoadSize;
157 }
158 LoadSizes = LoadSizes.drop_front();
159 }
160 return LoadSequence;
161}
162
163MemCmpExpansion::LoadEntryVector
164MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
165 const unsigned MaxLoadSize,
166 const unsigned MaxNumLoads,
167 unsigned &NumLoadsNonOneByte) {
168 // These are already handled by the greedy approach.
169 if (Size < 2 || MaxLoadSize < 2)
170 return {};
171
172 // We try to do as many non-overlapping loads as possible starting from the
173 // beginning.
174 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
175 assert(NumNonOverlappingLoads && "there must be at least one load");
176 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
177 // an overlapping load.
178 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
179 // Bail if we do not need an overloapping store, this is already handled by
180 // the greedy approach.
181 if (Size == 0)
182 return {};
183 // Bail if the number of loads (non-overlapping + potential overlapping one)
184 // is larger than the max allowed.
185 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
186 return {};
187
188 // Add non-overlapping loads.
189 LoadEntryVector LoadSequence;
190 uint64_t Offset = 0;
191 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
192 LoadSequence.push_back({MaxLoadSize, Offset});
193 Offset += MaxLoadSize;
194 }
195
196 // Add the last overlapping load.
197 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
198 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
199 NumLoadsNonOneByte = 1;
200 return LoadSequence;
201}
202
Clement Courbet063bed92017-11-03 12:12:27 +0000203// Initialize the basic block structure required for expansion of memcmp call
204// with given maximum load size and memcmp size parameter.
205// This structure includes:
206// 1. A list of load compare blocks - LoadCmpBlocks.
207// 2. An EndBlock, split from original instruction point, which is the block to
208// return from.
209// 3. ResultBlock, block to branch to for early exit when a
210// LoadCmpBlock finds a difference.
211MemCmpExpansion::MemCmpExpansion(
212 CallInst *const CI, uint64_t Size,
213 const TargetTransformInfo::MemCmpExpansionOptions &Options,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000214 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000215 : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
216 NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000217 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
Clement Courbet063bed92017-11-03 12:12:27 +0000218 assert(Size > 0 && "zero blocks");
219 // Scale the max size down if the target can load more bytes than we need.
Clement Courbet36a34802018-12-20 13:01:04 +0000220 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
221 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
222 LoadSizes = LoadSizes.drop_front();
Clement Courbet063bed92017-11-03 12:12:27 +0000223 }
Clement Courbet36a34802018-12-20 13:01:04 +0000224 assert(!LoadSizes.empty() && "cannot load Size bytes");
225 MaxLoadSize = LoadSizes.front();
Clement Courbet063bed92017-11-03 12:12:27 +0000226 // Compute the decomposition.
Clement Courbet36a34802018-12-20 13:01:04 +0000227 unsigned GreedyNumLoadsNonOneByte = 0;
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000228 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
Clement Courbet36a34802018-12-20 13:01:04 +0000229 GreedyNumLoadsNonOneByte);
230 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000231 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
Clement Courbet36a34802018-12-20 13:01:04 +0000232 // If we allow overlapping loads and the load sequence is not already optimal,
233 // use overlapping loads.
234 if (Options.AllowOverlappingLoads &&
235 (LoadSequence.empty() || LoadSequence.size() > 2)) {
236 unsigned OverlappingNumLoadsNonOneByte = 0;
237 auto OverlappingLoads = computeOverlappingLoadSequence(
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000238 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
Clement Courbet36a34802018-12-20 13:01:04 +0000239 if (!OverlappingLoads.empty() &&
240 (LoadSequence.empty() ||
241 OverlappingLoads.size() < LoadSequence.size())) {
242 LoadSequence = OverlappingLoads;
243 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
Clement Courbet063bed92017-11-03 12:12:27 +0000244 }
Clement Courbet063bed92017-11-03 12:12:27 +0000245 }
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000246 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
Clement Courbet063bed92017-11-03 12:12:27 +0000247}
248
249unsigned MemCmpExpansion::getNumBlocks() {
250 if (IsUsedForZeroCmp)
Sanjay Patelf3449872018-01-03 20:02:39 +0000251 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
252 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
Clement Courbet063bed92017-11-03 12:12:27 +0000253 return getNumLoads();
254}
255
256void MemCmpExpansion::createLoadCmpBlocks() {
257 for (unsigned i = 0; i < getNumBlocks(); i++) {
258 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
259 EndBlock->getParent(), EndBlock);
260 LoadCmpBlocks.push_back(BB);
261 }
262}
263
264void MemCmpExpansion::createResultBlock() {
265 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
266 EndBlock->getParent(), EndBlock);
267}
268
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100269MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
270 bool NeedsBSwap,
271 Type *CmpSizeType,
272 unsigned OffsetBytes) {
273 // Get the memory source at offset `OffsetBytes`.
274 Value *LhsSource = CI->getArgOperand(0);
275 Value *RhsSource = CI->getArgOperand(1);
Clement Courbet36a34802018-12-20 13:01:04 +0000276 if (OffsetBytes > 0) {
277 auto *ByteType = Type::getInt8Ty(CI->getContext());
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100278 LhsSource = Builder.CreateConstGEP1_64(
279 ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
280 OffsetBytes);
281 RhsSource = Builder.CreateConstGEP1_64(
282 ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
David Zarzyckif68925d2019-10-28 14:39:40 +0200283 OffsetBytes);
Clement Courbet36a34802018-12-20 13:01:04 +0000284 }
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100285 LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
286 RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
287
288 // Create a constant or a load from the source.
289 Value *Lhs = nullptr;
290 if (auto *C = dyn_cast<Constant>(LhsSource))
291 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
292 if (!Lhs)
293 Lhs = Builder.CreateLoad(LoadSizeType, LhsSource);
294
295 Value *Rhs = nullptr;
296 if (auto *C = dyn_cast<Constant>(RhsSource))
297 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
298 if (!Rhs)
299 Rhs = Builder.CreateLoad(LoadSizeType, RhsSource);
300
301 // Swap bytes if required.
302 if (NeedsBSwap) {
303 Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
304 Intrinsic::bswap, LoadSizeType);
305 Lhs = Builder.CreateCall(Bswap, Lhs);
306 Rhs = Builder.CreateCall(Bswap, Rhs);
307 }
308
309 // Zero extend if required.
310 if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
311 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
312 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
313 }
314 return {Lhs, Rhs};
Clement Courbet36a34802018-12-20 13:01:04 +0000315}
316
Clement Courbet063bed92017-11-03 12:12:27 +0000317// This function creates the IR instructions for loading and comparing 1 byte.
318// It loads 1 byte from each source of the memcmp parameters with the given
319// GEPIndex. It then subtracts the two loaded values and adds this result to the
320// final phi node for selecting the memcmp result.
321void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
Clement Courbet36a34802018-12-20 13:01:04 +0000322 unsigned OffsetBytes) {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000323 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100324 const LoadPair Loads =
325 getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
326 Type::getInt32Ty(CI->getContext()), OffsetBytes);
327 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000328
329 PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
330
331 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
332 // Early exit branch if difference found to EndBlock. Otherwise, continue to
333 // next LoadCmpBlock,
334 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
335 ConstantInt::get(Diff->getType(), 0));
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000336 BranchInst *CmpBr =
337 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
Clement Courbet063bed92017-11-03 12:12:27 +0000338 Builder.Insert(CmpBr);
339 } else {
340 // The last block has an unconditional branch to EndBlock.
341 BranchInst *CmpBr = BranchInst::Create(EndBlock);
342 Builder.Insert(CmpBr);
343 }
344}
345
346/// Generate an equality comparison for one or more pairs of loaded values.
347/// This is used in the case where the memcmp() call is compared equal or not
348/// equal to zero.
349Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
350 unsigned &LoadIndex) {
351 assert(LoadIndex < getNumLoads() &&
352 "getCompareLoadPairs() called with no remaining loads");
353 std::vector<Value *> XorList, OrList;
Simon Pilgrim2b45a702019-05-18 11:31:48 +0000354 Value *Diff = nullptr;
Clement Courbet063bed92017-11-03 12:12:27 +0000355
356 const unsigned NumLoads =
Sanjay Patelf3449872018-01-03 20:02:39 +0000357 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
Clement Courbet063bed92017-11-03 12:12:27 +0000358
359 // For a single-block expansion, start inserting before the memcmp call.
360 if (LoadCmpBlocks.empty())
361 Builder.SetInsertPoint(CI);
362 else
363 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
364
365 Value *Cmp = nullptr;
366 // If we have multiple loads per block, we need to generate a composite
367 // comparison using xor+or. The type for the combinations is the largest load
368 // type.
369 IntegerType *const MaxLoadType =
370 NumLoads == 1 ? nullptr
371 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
372 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
373 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100374 const LoadPair Loads = getLoadPair(
375 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
376 /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
Clement Courbet063bed92017-11-03 12:12:27 +0000377
378 if (NumLoads != 1) {
Clement Courbet063bed92017-11-03 12:12:27 +0000379 // If we have multiple loads per block, we need to generate a composite
380 // comparison using xor+or.
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100381 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000382 Diff = Builder.CreateZExt(Diff, MaxLoadType);
383 XorList.push_back(Diff);
384 } else {
385 // If there's only one load per block, we just compare the loaded values.
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100386 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000387 }
388 }
389
390 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
391 std::vector<Value *> OutList;
392 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
393 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
394 OutList.push_back(Or);
395 }
396 if (InList.size() % 2 != 0)
397 OutList.push_back(InList.back());
398 return OutList;
399 };
400
401 if (!Cmp) {
402 // Pairwise OR the XOR results.
403 OrList = pairWiseOr(XorList);
404
405 // Pairwise OR the OR results until one result left.
406 while (OrList.size() != 1) {
407 OrList = pairWiseOr(OrList);
408 }
Simon Pilgrim2b45a702019-05-18 11:31:48 +0000409
410 assert(Diff && "Failed to find comparison diff");
Clement Courbet063bed92017-11-03 12:12:27 +0000411 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
412 }
413
414 return Cmp;
415}
416
417void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
418 unsigned &LoadIndex) {
419 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
420
421 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
422 ? EndBlock
423 : LoadCmpBlocks[BlockIndex + 1];
424 // Early exit branch if difference found to ResultBlock. Otherwise,
425 // continue to next LoadCmpBlock or EndBlock.
426 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
427 Builder.Insert(CmpBr);
428
429 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
430 // since early exit to ResultBlock was not taken (no difference was found in
431 // any of the bytes).
432 if (BlockIndex == LoadCmpBlocks.size() - 1) {
433 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000434 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
Clement Courbet063bed92017-11-03 12:12:27 +0000435 }
436}
437
438// This function creates the IR intructions for loading and comparing using the
439// given LoadSize. It loads the number of bytes specified by LoadSize from each
440// source of the memcmp parameters. It then does a subtract to see if there was
441// a difference in the loaded values. If a difference is found, it branches
442// with an early exit to the ResultBlock for calculating which source was
443// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
444// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
445// a special case through emitLoadCompareByteBlock. The special handling can
446// simply subtract the loaded values and add it to the result phi node.
447void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
448 // There is one load per block in this case, BlockIndex == LoadIndex.
449 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
450
451 if (CurLoadEntry.LoadSize == 1) {
Clement Courbet36a34802018-12-20 13:01:04 +0000452 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
Clement Courbet063bed92017-11-03 12:12:27 +0000453 return;
454 }
455
456 Type *LoadSizeType =
457 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
458 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
459 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
460
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000461 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Clement Courbete22cf4d2018-12-20 09:58:33 +0000462
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100463 const LoadPair Loads =
464 getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
465 CurLoadEntry.Offset);
Clement Courbet063bed92017-11-03 12:12:27 +0000466
467 // Add the loaded values to the phi nodes for calculating memcmp result only
468 // if result is not used in a zero equality.
469 if (!IsUsedForZeroCmp) {
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100470 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
471 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
Clement Courbet063bed92017-11-03 12:12:27 +0000472 }
473
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100474 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000475 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
476 ? EndBlock
477 : LoadCmpBlocks[BlockIndex + 1];
478 // Early exit branch if difference found to ResultBlock. Otherwise, continue
479 // to next LoadCmpBlock or EndBlock.
480 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
481 Builder.Insert(CmpBr);
482
483 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
484 // since early exit to ResultBlock was not taken (no difference was found in
485 // any of the bytes).
486 if (BlockIndex == LoadCmpBlocks.size() - 1) {
487 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000488 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
Clement Courbet063bed92017-11-03 12:12:27 +0000489 }
490}
491
492// This function populates the ResultBlock with a sequence to calculate the
493// memcmp result. It compares the two loaded source values and returns -1 if
494// src1 < src2 and 1 if src1 > src2.
495void MemCmpExpansion::emitMemCmpResultBlock() {
496 // Special case: if memcmp result is used in a zero equality, result does not
497 // need to be calculated and can simply return 1.
498 if (IsUsedForZeroCmp) {
499 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
500 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
501 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
502 PhiRes->addIncoming(Res, ResBlock.BB);
503 BranchInst *NewBr = BranchInst::Create(EndBlock);
504 Builder.Insert(NewBr);
505 return;
506 }
507 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
508 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
509
510 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
511 ResBlock.PhiSrc2);
512
513 Value *Res =
514 Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
515 ConstantInt::get(Builder.getInt32Ty(), 1));
516
517 BranchInst *NewBr = BranchInst::Create(EndBlock);
518 Builder.Insert(NewBr);
519 PhiRes->addIncoming(Res, ResBlock.BB);
520}
521
522void MemCmpExpansion::setupResultBlockPHINodes() {
523 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
524 Builder.SetInsertPoint(ResBlock.BB);
525 // Note: this assumes one load per block.
526 ResBlock.PhiSrc1 =
527 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
528 ResBlock.PhiSrc2 =
529 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
530}
531
532void MemCmpExpansion::setupEndBlockPHINodes() {
533 Builder.SetInsertPoint(&EndBlock->front());
534 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
535}
536
537Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
538 unsigned LoadIndex = 0;
539 // This loop populates each of the LoadCmpBlocks with the IR sequence to
540 // handle multiple loads per block.
541 for (unsigned I = 0; I < getNumBlocks(); ++I) {
542 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
543 }
544
545 emitMemCmpResultBlock();
546 return PhiRes;
547}
548
549/// A memcmp expansion that compares equality with 0 and only has one block of
550/// load and compare can bypass the compare, branch, and phi IR that is required
551/// in the general case.
552Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
553 unsigned LoadIndex = 0;
554 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
555 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
556 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
557}
558
559/// A memcmp expansion that only has one block of load and compare can bypass
560/// the compare, branch, and phi IR that is required in the general case.
561Value *MemCmpExpansion::getMemCmpOneBlock() {
Clement Courbet063bed92017-11-03 12:12:27 +0000562 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100563 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
Clement Courbet063bed92017-11-03 12:12:27 +0000564
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100565 // The i8 and i16 cases don't need compares. We zext the loaded values and
566 // subtract them to get the suitable negative, zero, or positive i32 result.
Clement Courbet063bed92017-11-03 12:12:27 +0000567 if (Size < 4) {
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100568 const LoadPair Loads =
569 getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
570 /*Offset*/ 0);
571 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000572 }
573
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100574 const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
575 /*Offset*/ 0);
Clement Courbet063bed92017-11-03 12:12:27 +0000576 // The result of memcmp is negative, zero, or positive, so produce that by
577 // subtracting 2 extended compare bits: sub (ugt, ult).
578 // If a target prefers to use selects to get -1/0/1, they should be able
579 // to transform this later. The inverse transform (going from selects to math)
580 // may not be possible in the DAG because the selects got converted into
581 // branches before we got there.
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100582 Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
583 Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000584 Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
585 Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
586 return Builder.CreateSub(ZextUGT, ZextULT);
587}
588
589// This function expands the memcmp call into an inline expansion and returns
590// the memcmp result.
591Value *MemCmpExpansion::getMemCmpExpansion() {
Sanjay Patel5a48aef2018-01-06 16:16:04 +0000592 // Create the basic block framework for a multi-block expansion.
593 if (getNumBlocks() != 1) {
Clement Courbet063bed92017-11-03 12:12:27 +0000594 BasicBlock *StartBlock = CI->getParent();
595 EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
596 setupEndBlockPHINodes();
597 createResultBlock();
598
599 // If return value of memcmp is not used in a zero equality, we need to
600 // calculate which source was larger. The calculation requires the
601 // two loaded source values of each load compare block.
602 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000603 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
Clement Courbet063bed92017-11-03 12:12:27 +0000604
605 // Create the number of required load compare basic blocks.
606 createLoadCmpBlocks();
607
608 // Update the terminator added by splitBasicBlock to branch to the first
609 // LoadCmpBlock.
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000610 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
Clement Courbet063bed92017-11-03 12:12:27 +0000611 }
612
613 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
614
615 if (IsUsedForZeroCmp)
616 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
617 : getMemCmpExpansionZeroCase();
618
Sanjay Patelf3449872018-01-03 20:02:39 +0000619 if (getNumBlocks() == 1)
620 return getMemCmpOneBlock();
Clement Courbet063bed92017-11-03 12:12:27 +0000621
622 for (unsigned I = 0; I < getNumBlocks(); ++I) {
623 emitLoadCompareBlock(I);
624 }
625
626 emitMemCmpResultBlock();
627 return PhiRes;
628}
629
630// This function checks to see if an expansion of memcmp can be generated.
631// It checks for constant compare size that is less than the max inline size.
632// If an expansion cannot occur, returns false to leave as a library call.
633// Otherwise, the library call is replaced with a new IR instruction sequence.
634/// We want to transform:
635/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
636/// To:
637/// loadbb:
638/// %0 = bitcast i32* %buffer2 to i8*
639/// %1 = bitcast i32* %buffer1 to i8*
640/// %2 = bitcast i8* %1 to i64*
641/// %3 = bitcast i8* %0 to i64*
642/// %4 = load i64, i64* %2
643/// %5 = load i64, i64* %3
644/// %6 = call i64 @llvm.bswap.i64(i64 %4)
645/// %7 = call i64 @llvm.bswap.i64(i64 %5)
646/// %8 = sub i64 %6, %7
647/// %9 = icmp ne i64 %8, 0
648/// br i1 %9, label %res_block, label %loadbb1
649/// res_block: ; preds = %loadbb2,
650/// %loadbb1, %loadbb
651/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
652/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
653/// %10 = icmp ult i64 %phi.src1, %phi.src2
654/// %11 = select i1 %10, i32 -1, i32 1
655/// br label %endblock
656/// loadbb1: ; preds = %loadbb
657/// %12 = bitcast i32* %buffer2 to i8*
658/// %13 = bitcast i32* %buffer1 to i8*
659/// %14 = bitcast i8* %13 to i32*
660/// %15 = bitcast i8* %12 to i32*
661/// %16 = getelementptr i32, i32* %14, i32 2
662/// %17 = getelementptr i32, i32* %15, i32 2
663/// %18 = load i32, i32* %16
664/// %19 = load i32, i32* %17
665/// %20 = call i32 @llvm.bswap.i32(i32 %18)
666/// %21 = call i32 @llvm.bswap.i32(i32 %19)
667/// %22 = zext i32 %20 to i64
668/// %23 = zext i32 %21 to i64
669/// %24 = sub i64 %22, %23
670/// %25 = icmp ne i64 %24, 0
671/// br i1 %25, label %res_block, label %loadbb2
672/// loadbb2: ; preds = %loadbb1
673/// %26 = bitcast i32* %buffer2 to i8*
674/// %27 = bitcast i32* %buffer1 to i8*
675/// %28 = bitcast i8* %27 to i16*
676/// %29 = bitcast i8* %26 to i16*
677/// %30 = getelementptr i16, i16* %28, i16 6
678/// %31 = getelementptr i16, i16* %29, i16 6
679/// %32 = load i16, i16* %30
680/// %33 = load i16, i16* %31
681/// %34 = call i16 @llvm.bswap.i16(i16 %32)
682/// %35 = call i16 @llvm.bswap.i16(i16 %33)
683/// %36 = zext i16 %34 to i64
684/// %37 = zext i16 %35 to i64
685/// %38 = sub i64 %36, %37
686/// %39 = icmp ne i64 %38, 0
687/// br i1 %39, label %res_block, label %loadbb3
688/// loadbb3: ; preds = %loadbb2
689/// %40 = bitcast i32* %buffer2 to i8*
690/// %41 = bitcast i32* %buffer1 to i8*
691/// %42 = getelementptr i8, i8* %41, i8 14
692/// %43 = getelementptr i8, i8* %40, i8 14
693/// %44 = load i8, i8* %42
694/// %45 = load i8, i8* %43
695/// %46 = zext i8 %44 to i32
696/// %47 = zext i8 %45 to i32
697/// %48 = sub i32 %46, %47
698/// br label %endblock
699/// endblock: ; preds = %res_block,
700/// %loadbb3
701/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
702/// ret i32 %phi.res
703static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800704 const TargetLowering *TLI, const DataLayout *DL,
705 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
Clement Courbet063bed92017-11-03 12:12:27 +0000706 NumMemCmpCalls++;
707
708 // Early exit from expansion if -Oz.
Evandro Menezes85bd3972019-04-04 22:40:06 +0000709 if (CI->getFunction()->hasMinSize())
Clement Courbet063bed92017-11-03 12:12:27 +0000710 return false;
711
712 // Early exit from expansion if size is not a constant.
713 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
714 if (!SizeCast) {
715 NumMemCmpNotConstant++;
716 return false;
717 }
718 const uint64_t SizeVal = SizeCast->getZExtValue();
719
720 if (SizeVal == 0) {
721 return false;
722 }
Clement Courbet063bed92017-11-03 12:12:27 +0000723 // TTI call to check if target would like to expand memcmp. Also, get the
724 // available load sizes.
725 const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800726 bool OptForSize = CI->getFunction()->hasOptSize() ||
727 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
728 auto Options = TTI->enableMemCmpExpansion(OptForSize,
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000729 IsUsedForZeroCmp);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000730 if (!Options) return false;
Clement Courbet063bed92017-11-03 12:12:27 +0000731
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000732 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
733 Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
Clement Courbet063bed92017-11-03 12:12:27 +0000734
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800735 if (OptForSize &&
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000736 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
737 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
Sanjay Patelf3449872018-01-03 20:02:39 +0000738
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800739 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000740 Options.MaxNumLoads = MaxLoadsPerMemcmp;
741
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000742 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
Clement Courbet063bed92017-11-03 12:12:27 +0000743
744 // Don't expand if this will require more loads than desired by the target.
745 if (Expansion.getNumLoads() == 0) {
746 NumMemCmpGreaterThanMax++;
747 return false;
748 }
749
750 NumMemCmpInlined++;
751
752 Value *Res = Expansion.getMemCmpExpansion();
753
754 // Replace call with result of expansion and erase call.
755 CI->replaceAllUsesWith(Res);
756 CI->eraseFromParent();
757
758 return true;
759}
760
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000761
762
Clement Courbet063bed92017-11-03 12:12:27 +0000763class ExpandMemCmpPass : public FunctionPass {
764public:
765 static char ID;
766
767 ExpandMemCmpPass() : FunctionPass(ID) {
768 initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
769 }
770
771 bool runOnFunction(Function &F) override {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000772 if (skipFunction(F)) return false;
773
774 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
775 if (!TPC) {
Clement Courbet063bed92017-11-03 12:12:27 +0000776 return false;
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000777 }
778 const TargetLowering* TL =
779 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
Clement Courbet063bed92017-11-03 12:12:27 +0000780
781 const TargetLibraryInfo *TLI =
Teresa Johnson9c27b592019-09-07 03:09:36 +0000782 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
Clement Courbet063bed92017-11-03 12:12:27 +0000783 const TargetTransformInfo *TTI =
784 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800785 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
786 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
787 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
788 nullptr;
789 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
Clement Courbet063bed92017-11-03 12:12:27 +0000790 return !PA.areAllPreserved();
791 }
792
793private:
794 void getAnalysisUsage(AnalysisUsage &AU) const override {
795 AU.addRequired<TargetLibraryInfoWrapperPass>();
796 AU.addRequired<TargetTransformInfoWrapperPass>();
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800797 AU.addRequired<ProfileSummaryInfoWrapperPass>();
798 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
Clement Courbet063bed92017-11-03 12:12:27 +0000799 FunctionPass::getAnalysisUsage(AU);
800 }
801
802 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000803 const TargetTransformInfo *TTI,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800804 const TargetLowering* TL,
805 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
Clement Courbet063bed92017-11-03 12:12:27 +0000806 // Returns true if a change was made.
807 bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000808 const TargetTransformInfo *TTI, const TargetLowering* TL,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800809 const DataLayout& DL, ProfileSummaryInfo *PSI,
810 BlockFrequencyInfo *BFI);
Clement Courbet063bed92017-11-03 12:12:27 +0000811};
812
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000813bool ExpandMemCmpPass::runOnBlock(
814 BasicBlock &BB, const TargetLibraryInfo *TLI,
815 const TargetTransformInfo *TTI, const TargetLowering* TL,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800816 const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000817 for (Instruction& I : BB) {
Clement Courbet063bed92017-11-03 12:12:27 +0000818 CallInst *CI = dyn_cast<CallInst>(&I);
819 if (!CI) {
820 continue;
821 }
822 LibFunc Func;
823 if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
Clement Courbet238af522019-03-20 11:51:11 +0000824 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800825 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
Clement Courbet063bed92017-11-03 12:12:27 +0000826 return true;
827 }
828 }
829 return false;
830}
831
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000832
833PreservedAnalyses ExpandMemCmpPass::runImpl(
834 Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800835 const TargetLowering* TL, ProfileSummaryInfo *PSI,
836 BlockFrequencyInfo *BFI) {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000837 const DataLayout& DL = F.getParent()->getDataLayout();
Clement Courbet063bed92017-11-03 12:12:27 +0000838 bool MadeChanges = false;
839 for (auto BBIt = F.begin(); BBIt != F.end();) {
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800840 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
Clement Courbet063bed92017-11-03 12:12:27 +0000841 MadeChanges = true;
842 // If changes were made, restart the function from the beginning, since
843 // the structure of the function was changed.
844 BBIt = F.begin();
845 } else {
846 ++BBIt;
847 }
848 }
Clement Courbet6518b722020-03-03 13:17:21 +0100849 if (MadeChanges)
850 for (BasicBlock &BB : F)
851 SimplifyInstructionsInBlock(&BB);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000852 return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
Clement Courbet063bed92017-11-03 12:12:27 +0000853}
854
855} // namespace
856
857char ExpandMemCmpPass::ID = 0;
858INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
859 "Expand memcmp() to load/stores", false, false)
860INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
861INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800862INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
863INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
Clement Courbet063bed92017-11-03 12:12:27 +0000864INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
865 "Expand memcmp() to load/stores", false, false)
866
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000867FunctionPass *llvm::createExpandMemCmpPass() {
868 return new ExpandMemCmpPass();
869}