blob: 04433ed2f0de3e77badc5225af2df20e82396d47 [file] [log] [blame]
Clement Courbet063bed92017-11-03 12:12:27 +00001//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Clement Courbet063bed92017-11-03 12:12:27 +00006//
7//===----------------------------------------------------------------------===//
8//
Clement Courbet6f42de32017-12-18 07:32:48 +00009// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
Clement Courbet063bed92017-11-03 12:12:27 +000011//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/Statistic.h"
15#include "llvm/Analysis/ConstantFolding.h"
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -080016#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
17#include "llvm/Analysis/ProfileSummaryInfo.h"
Clement Courbet063bed92017-11-03 12:12:27 +000018#include "llvm/Analysis/TargetLibraryInfo.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/Analysis/ValueTracking.h"
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000021#include "llvm/CodeGen/TargetLowering.h"
22#include "llvm/CodeGen/TargetPassConfig.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000023#include "llvm/CodeGen/TargetSubtargetInfo.h"
Clement Courbet063bed92017-11-03 12:12:27 +000024#include "llvm/IR/IRBuilder.h"
Reid Kleckner05da2fe2019-11-13 13:15:01 -080025#include "llvm/InitializePasses.h"
Clement Courbet6518b722020-03-03 13:17:21 +010026#include "llvm/Transforms/Utils/Local.h"
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -080027#include "llvm/Transforms/Utils/SizeOpts.h"
Clement Courbet063bed92017-11-03 12:12:27 +000028
29using namespace llvm;
30
31#define DEBUG_TYPE "expandmemcmp"
32
33STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
34STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
35STATISTIC(NumMemCmpGreaterThanMax,
36 "Number of memcmp calls with size greater than max size");
37STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
38
Sanjay Patelf3449872018-01-03 20:02:39 +000039static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
Clement Courbet063bed92017-11-03 12:12:27 +000040 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
41 cl::desc("The number of loads per basic block for inline expansion of "
42 "memcmp that is only being compared against zero."));
43
Hiroshi Yamauchic27ff0d2019-04-12 15:05:46 +000044static cl::opt<unsigned> MaxLoadsPerMemcmp(
45 "max-loads-per-memcmp", cl::Hidden,
46 cl::desc("Set maximum number of loads used in expanded memcmp"));
47
48static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
49 "max-loads-per-memcmp-opt-size", cl::Hidden,
50 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
51
Clement Courbet063bed92017-11-03 12:12:27 +000052namespace {
53
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000054
Clement Courbet063bed92017-11-03 12:12:27 +000055// This class provides helper functions to expand a memcmp library call into an
56// inline expansion.
57class MemCmpExpansion {
58 struct ResultBlock {
59 BasicBlock *BB = nullptr;
60 PHINode *PhiSrc1 = nullptr;
61 PHINode *PhiSrc2 = nullptr;
62
63 ResultBlock() = default;
64 };
65
66 CallInst *const CI;
67 ResultBlock ResBlock;
68 const uint64_t Size;
69 unsigned MaxLoadSize;
70 uint64_t NumLoadsNonOneByte;
Sanjay Patelf3449872018-01-03 20:02:39 +000071 const uint64_t NumLoadsPerBlockForZeroCmp;
Clement Courbet063bed92017-11-03 12:12:27 +000072 std::vector<BasicBlock *> LoadCmpBlocks;
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000073 BasicBlock *EndBlock;
Clement Courbet063bed92017-11-03 12:12:27 +000074 PHINode *PhiRes;
75 const bool IsUsedForZeroCmp;
76 const DataLayout &DL;
77 IRBuilder<> Builder;
78 // Represents the decomposition in blocks of the expansion. For example,
79 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
Clement Courbetb0ae20d2020-03-03 11:06:37 +010080 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
Clement Courbet063bed92017-11-03 12:12:27 +000081 struct LoadEntry {
82 LoadEntry(unsigned LoadSize, uint64_t Offset)
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +000083 : LoadSize(LoadSize), Offset(Offset) {
84 }
Clement Courbet063bed92017-11-03 12:12:27 +000085
Clement Courbet063bed92017-11-03 12:12:27 +000086 // The size of the load for this block, in bytes.
Clement Courbet36a34802018-12-20 13:01:04 +000087 unsigned LoadSize;
88 // The offset of this load from the base pointer, in bytes.
89 uint64_t Offset;
Clement Courbet063bed92017-11-03 12:12:27 +000090 };
Clement Courbet36a34802018-12-20 13:01:04 +000091 using LoadEntryVector = SmallVector<LoadEntry, 8>;
92 LoadEntryVector LoadSequence;
Clement Courbet063bed92017-11-03 12:12:27 +000093
94 void createLoadCmpBlocks();
95 void createResultBlock();
96 void setupResultBlockPHINodes();
97 void setupEndBlockPHINodes();
98 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
99 void emitLoadCompareBlock(unsigned BlockIndex);
100 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
101 unsigned &LoadIndex);
Clement Courbet36a34802018-12-20 13:01:04 +0000102 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
Clement Courbet063bed92017-11-03 12:12:27 +0000103 void emitMemCmpResultBlock();
104 Value *getMemCmpExpansionZeroCase();
105 Value *getMemCmpEqZeroOneBlock();
106 Value *getMemCmpOneBlock();
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100107 struct LoadPair {
108 Value *Lhs = nullptr;
109 Value *Rhs = nullptr;
110 };
111 LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
112 unsigned OffsetBytes);
Clement Courbet063bed92017-11-03 12:12:27 +0000113
Clement Courbet36a34802018-12-20 13:01:04 +0000114 static LoadEntryVector
115 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
116 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
117 static LoadEntryVector
118 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
119 unsigned MaxNumLoads,
120 unsigned &NumLoadsNonOneByte);
121
122public:
Clement Courbet063bed92017-11-03 12:12:27 +0000123 MemCmpExpansion(CallInst *CI, uint64_t Size,
124 const TargetTransformInfo::MemCmpExpansionOptions &Options,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000125 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
Clement Courbet063bed92017-11-03 12:12:27 +0000126
127 unsigned getNumBlocks();
128 uint64_t getNumLoads() const { return LoadSequence.size(); }
129
130 Value *getMemCmpExpansion();
131};
132
Clement Courbet36a34802018-12-20 13:01:04 +0000133MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
134 uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
135 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
136 NumLoadsNonOneByte = 0;
137 LoadEntryVector LoadSequence;
138 uint64_t Offset = 0;
139 while (Size && !LoadSizes.empty()) {
140 const unsigned LoadSize = LoadSizes.front();
141 const uint64_t NumLoadsForThisSize = Size / LoadSize;
142 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
143 // Do not expand if the total number of loads is larger than what the
144 // target allows. Note that it's important that we exit before completing
145 // the expansion to avoid using a ton of memory to store the expansion for
146 // large sizes.
147 return {};
148 }
149 if (NumLoadsForThisSize > 0) {
150 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
151 LoadSequence.push_back({LoadSize, Offset});
152 Offset += LoadSize;
153 }
154 if (LoadSize > 1)
155 ++NumLoadsNonOneByte;
156 Size = Size % LoadSize;
157 }
158 LoadSizes = LoadSizes.drop_front();
159 }
160 return LoadSequence;
161}
162
163MemCmpExpansion::LoadEntryVector
164MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
165 const unsigned MaxLoadSize,
166 const unsigned MaxNumLoads,
167 unsigned &NumLoadsNonOneByte) {
168 // These are already handled by the greedy approach.
169 if (Size < 2 || MaxLoadSize < 2)
170 return {};
171
172 // We try to do as many non-overlapping loads as possible starting from the
173 // beginning.
174 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
175 assert(NumNonOverlappingLoads && "there must be at least one load");
176 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
177 // an overlapping load.
178 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
179 // Bail if we do not need an overloapping store, this is already handled by
180 // the greedy approach.
181 if (Size == 0)
182 return {};
183 // Bail if the number of loads (non-overlapping + potential overlapping one)
184 // is larger than the max allowed.
185 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
186 return {};
187
188 // Add non-overlapping loads.
189 LoadEntryVector LoadSequence;
190 uint64_t Offset = 0;
191 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
192 LoadSequence.push_back({MaxLoadSize, Offset});
193 Offset += MaxLoadSize;
194 }
195
196 // Add the last overlapping load.
197 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
198 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
199 NumLoadsNonOneByte = 1;
200 return LoadSequence;
201}
202
Clement Courbet063bed92017-11-03 12:12:27 +0000203// Initialize the basic block structure required for expansion of memcmp call
204// with given maximum load size and memcmp size parameter.
205// This structure includes:
206// 1. A list of load compare blocks - LoadCmpBlocks.
207// 2. An EndBlock, split from original instruction point, which is the block to
208// return from.
209// 3. ResultBlock, block to branch to for early exit when a
210// LoadCmpBlock finds a difference.
211MemCmpExpansion::MemCmpExpansion(
212 CallInst *const CI, uint64_t Size,
213 const TargetTransformInfo::MemCmpExpansionOptions &Options,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000214 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000215 : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
216 NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000217 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
Clement Courbet063bed92017-11-03 12:12:27 +0000218 assert(Size > 0 && "zero blocks");
219 // Scale the max size down if the target can load more bytes than we need.
Clement Courbet36a34802018-12-20 13:01:04 +0000220 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
221 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
222 LoadSizes = LoadSizes.drop_front();
Clement Courbet063bed92017-11-03 12:12:27 +0000223 }
Clement Courbet36a34802018-12-20 13:01:04 +0000224 assert(!LoadSizes.empty() && "cannot load Size bytes");
225 MaxLoadSize = LoadSizes.front();
Clement Courbet063bed92017-11-03 12:12:27 +0000226 // Compute the decomposition.
Clement Courbet36a34802018-12-20 13:01:04 +0000227 unsigned GreedyNumLoadsNonOneByte = 0;
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000228 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
Clement Courbet36a34802018-12-20 13:01:04 +0000229 GreedyNumLoadsNonOneByte);
230 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000231 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
Clement Courbet36a34802018-12-20 13:01:04 +0000232 // If we allow overlapping loads and the load sequence is not already optimal,
233 // use overlapping loads.
234 if (Options.AllowOverlappingLoads &&
235 (LoadSequence.empty() || LoadSequence.size() > 2)) {
236 unsigned OverlappingNumLoadsNonOneByte = 0;
237 auto OverlappingLoads = computeOverlappingLoadSequence(
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000238 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
Clement Courbet36a34802018-12-20 13:01:04 +0000239 if (!OverlappingLoads.empty() &&
240 (LoadSequence.empty() ||
241 OverlappingLoads.size() < LoadSequence.size())) {
242 LoadSequence = OverlappingLoads;
243 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
Clement Courbet063bed92017-11-03 12:12:27 +0000244 }
Clement Courbet063bed92017-11-03 12:12:27 +0000245 }
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000246 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
Clement Courbet063bed92017-11-03 12:12:27 +0000247}
248
249unsigned MemCmpExpansion::getNumBlocks() {
250 if (IsUsedForZeroCmp)
Sanjay Patelf3449872018-01-03 20:02:39 +0000251 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
252 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
Clement Courbet063bed92017-11-03 12:12:27 +0000253 return getNumLoads();
254}
255
256void MemCmpExpansion::createLoadCmpBlocks() {
257 for (unsigned i = 0; i < getNumBlocks(); i++) {
258 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
259 EndBlock->getParent(), EndBlock);
260 LoadCmpBlocks.push_back(BB);
261 }
262}
263
264void MemCmpExpansion::createResultBlock() {
265 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
266 EndBlock->getParent(), EndBlock);
267}
268
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100269MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
270 bool NeedsBSwap,
271 Type *CmpSizeType,
272 unsigned OffsetBytes) {
273 // Get the memory source at offset `OffsetBytes`.
274 Value *LhsSource = CI->getArgOperand(0);
275 Value *RhsSource = CI->getArgOperand(1);
Eli Friedmanf26bdb52020-05-16 17:55:18 -0700276 Align LhsAlign = LhsSource->getPointerAlignment(DL);
277 Align RhsAlign = RhsSource->getPointerAlignment(DL);
Clement Courbet36a34802018-12-20 13:01:04 +0000278 if (OffsetBytes > 0) {
279 auto *ByteType = Type::getInt8Ty(CI->getContext());
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100280 LhsSource = Builder.CreateConstGEP1_64(
281 ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
282 OffsetBytes);
283 RhsSource = Builder.CreateConstGEP1_64(
284 ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
David Zarzyckif68925d2019-10-28 14:39:40 +0200285 OffsetBytes);
Juneyoung Lee7aecf232020-03-16 22:38:29 +0900286 LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
287 RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
Clement Courbet36a34802018-12-20 13:01:04 +0000288 }
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100289 LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
290 RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
291
292 // Create a constant or a load from the source.
293 Value *Lhs = nullptr;
294 if (auto *C = dyn_cast<Constant>(LhsSource))
295 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
296 if (!Lhs)
Juneyoung Lee7aecf232020-03-16 22:38:29 +0900297 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100298
299 Value *Rhs = nullptr;
300 if (auto *C = dyn_cast<Constant>(RhsSource))
301 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
302 if (!Rhs)
Juneyoung Lee7aecf232020-03-16 22:38:29 +0900303 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100304
305 // Swap bytes if required.
306 if (NeedsBSwap) {
307 Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
308 Intrinsic::bswap, LoadSizeType);
309 Lhs = Builder.CreateCall(Bswap, Lhs);
310 Rhs = Builder.CreateCall(Bswap, Rhs);
311 }
312
313 // Zero extend if required.
314 if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
315 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
316 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
317 }
318 return {Lhs, Rhs};
Clement Courbet36a34802018-12-20 13:01:04 +0000319}
320
Clement Courbet063bed92017-11-03 12:12:27 +0000321// This function creates the IR instructions for loading and comparing 1 byte.
322// It loads 1 byte from each source of the memcmp parameters with the given
323// GEPIndex. It then subtracts the two loaded values and adds this result to the
324// final phi node for selecting the memcmp result.
325void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
Clement Courbet36a34802018-12-20 13:01:04 +0000326 unsigned OffsetBytes) {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000327 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100328 const LoadPair Loads =
329 getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
330 Type::getInt32Ty(CI->getContext()), OffsetBytes);
331 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000332
333 PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
334
335 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
336 // Early exit branch if difference found to EndBlock. Otherwise, continue to
337 // next LoadCmpBlock,
338 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
339 ConstantInt::get(Diff->getType(), 0));
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000340 BranchInst *CmpBr =
341 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
Clement Courbet063bed92017-11-03 12:12:27 +0000342 Builder.Insert(CmpBr);
343 } else {
344 // The last block has an unconditional branch to EndBlock.
345 BranchInst *CmpBr = BranchInst::Create(EndBlock);
346 Builder.Insert(CmpBr);
347 }
348}
349
350/// Generate an equality comparison for one or more pairs of loaded values.
351/// This is used in the case where the memcmp() call is compared equal or not
352/// equal to zero.
353Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
354 unsigned &LoadIndex) {
355 assert(LoadIndex < getNumLoads() &&
356 "getCompareLoadPairs() called with no remaining loads");
357 std::vector<Value *> XorList, OrList;
Simon Pilgrim2b45a702019-05-18 11:31:48 +0000358 Value *Diff = nullptr;
Clement Courbet063bed92017-11-03 12:12:27 +0000359
360 const unsigned NumLoads =
Sanjay Patelf3449872018-01-03 20:02:39 +0000361 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
Clement Courbet063bed92017-11-03 12:12:27 +0000362
363 // For a single-block expansion, start inserting before the memcmp call.
364 if (LoadCmpBlocks.empty())
365 Builder.SetInsertPoint(CI);
366 else
367 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
368
369 Value *Cmp = nullptr;
370 // If we have multiple loads per block, we need to generate a composite
371 // comparison using xor+or. The type for the combinations is the largest load
372 // type.
373 IntegerType *const MaxLoadType =
374 NumLoads == 1 ? nullptr
375 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
376 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
377 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100378 const LoadPair Loads = getLoadPair(
379 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
380 /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
Clement Courbet063bed92017-11-03 12:12:27 +0000381
382 if (NumLoads != 1) {
Clement Courbet063bed92017-11-03 12:12:27 +0000383 // If we have multiple loads per block, we need to generate a composite
384 // comparison using xor+or.
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100385 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000386 Diff = Builder.CreateZExt(Diff, MaxLoadType);
387 XorList.push_back(Diff);
388 } else {
389 // If there's only one load per block, we just compare the loaded values.
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100390 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000391 }
392 }
393
394 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
395 std::vector<Value *> OutList;
396 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
397 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
398 OutList.push_back(Or);
399 }
400 if (InList.size() % 2 != 0)
401 OutList.push_back(InList.back());
402 return OutList;
403 };
404
405 if (!Cmp) {
406 // Pairwise OR the XOR results.
407 OrList = pairWiseOr(XorList);
408
409 // Pairwise OR the OR results until one result left.
410 while (OrList.size() != 1) {
411 OrList = pairWiseOr(OrList);
412 }
Simon Pilgrim2b45a702019-05-18 11:31:48 +0000413
414 assert(Diff && "Failed to find comparison diff");
Clement Courbet063bed92017-11-03 12:12:27 +0000415 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
416 }
417
418 return Cmp;
419}
420
421void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
422 unsigned &LoadIndex) {
423 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
424
425 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
426 ? EndBlock
427 : LoadCmpBlocks[BlockIndex + 1];
428 // Early exit branch if difference found to ResultBlock. Otherwise,
429 // continue to next LoadCmpBlock or EndBlock.
430 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
431 Builder.Insert(CmpBr);
432
433 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
434 // since early exit to ResultBlock was not taken (no difference was found in
435 // any of the bytes).
436 if (BlockIndex == LoadCmpBlocks.size() - 1) {
437 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000438 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
Clement Courbet063bed92017-11-03 12:12:27 +0000439 }
440}
441
442// This function creates the IR intructions for loading and comparing using the
443// given LoadSize. It loads the number of bytes specified by LoadSize from each
444// source of the memcmp parameters. It then does a subtract to see if there was
445// a difference in the loaded values. If a difference is found, it branches
446// with an early exit to the ResultBlock for calculating which source was
447// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
448// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
449// a special case through emitLoadCompareByteBlock. The special handling can
450// simply subtract the loaded values and add it to the result phi node.
451void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
452 // There is one load per block in this case, BlockIndex == LoadIndex.
453 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
454
455 if (CurLoadEntry.LoadSize == 1) {
Clement Courbet36a34802018-12-20 13:01:04 +0000456 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
Clement Courbet063bed92017-11-03 12:12:27 +0000457 return;
458 }
459
460 Type *LoadSizeType =
461 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
462 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
463 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
464
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000465 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Clement Courbete22cf4d2018-12-20 09:58:33 +0000466
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100467 const LoadPair Loads =
468 getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
469 CurLoadEntry.Offset);
Clement Courbet063bed92017-11-03 12:12:27 +0000470
471 // Add the loaded values to the phi nodes for calculating memcmp result only
472 // if result is not used in a zero equality.
473 if (!IsUsedForZeroCmp) {
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100474 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
475 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
Clement Courbet063bed92017-11-03 12:12:27 +0000476 }
477
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100478 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000479 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
480 ? EndBlock
481 : LoadCmpBlocks[BlockIndex + 1];
482 // Early exit branch if difference found to ResultBlock. Otherwise, continue
483 // to next LoadCmpBlock or EndBlock.
484 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
485 Builder.Insert(CmpBr);
486
487 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
488 // since early exit to ResultBlock was not taken (no difference was found in
489 // any of the bytes).
490 if (BlockIndex == LoadCmpBlocks.size() - 1) {
491 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000492 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
Clement Courbet063bed92017-11-03 12:12:27 +0000493 }
494}
495
496// This function populates the ResultBlock with a sequence to calculate the
497// memcmp result. It compares the two loaded source values and returns -1 if
498// src1 < src2 and 1 if src1 > src2.
499void MemCmpExpansion::emitMemCmpResultBlock() {
500 // Special case: if memcmp result is used in a zero equality, result does not
501 // need to be calculated and can simply return 1.
502 if (IsUsedForZeroCmp) {
503 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
504 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
505 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
506 PhiRes->addIncoming(Res, ResBlock.BB);
507 BranchInst *NewBr = BranchInst::Create(EndBlock);
508 Builder.Insert(NewBr);
509 return;
510 }
511 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
512 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
513
514 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
515 ResBlock.PhiSrc2);
516
517 Value *Res =
518 Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
519 ConstantInt::get(Builder.getInt32Ty(), 1));
520
521 BranchInst *NewBr = BranchInst::Create(EndBlock);
522 Builder.Insert(NewBr);
523 PhiRes->addIncoming(Res, ResBlock.BB);
524}
525
526void MemCmpExpansion::setupResultBlockPHINodes() {
527 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
528 Builder.SetInsertPoint(ResBlock.BB);
529 // Note: this assumes one load per block.
530 ResBlock.PhiSrc1 =
531 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
532 ResBlock.PhiSrc2 =
533 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
534}
535
536void MemCmpExpansion::setupEndBlockPHINodes() {
537 Builder.SetInsertPoint(&EndBlock->front());
538 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
539}
540
541Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
542 unsigned LoadIndex = 0;
543 // This loop populates each of the LoadCmpBlocks with the IR sequence to
544 // handle multiple loads per block.
545 for (unsigned I = 0; I < getNumBlocks(); ++I) {
546 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
547 }
548
549 emitMemCmpResultBlock();
550 return PhiRes;
551}
552
553/// A memcmp expansion that compares equality with 0 and only has one block of
554/// load and compare can bypass the compare, branch, and phi IR that is required
555/// in the general case.
556Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
557 unsigned LoadIndex = 0;
558 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
559 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
560 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
561}
562
563/// A memcmp expansion that only has one block of load and compare can bypass
564/// the compare, branch, and phi IR that is required in the general case.
565Value *MemCmpExpansion::getMemCmpOneBlock() {
Clement Courbet063bed92017-11-03 12:12:27 +0000566 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100567 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
Clement Courbet063bed92017-11-03 12:12:27 +0000568
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100569 // The i8 and i16 cases don't need compares. We zext the loaded values and
570 // subtract them to get the suitable negative, zero, or positive i32 result.
Clement Courbet063bed92017-11-03 12:12:27 +0000571 if (Size < 4) {
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100572 const LoadPair Loads =
573 getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
574 /*Offset*/ 0);
575 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000576 }
577
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100578 const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
579 /*Offset*/ 0);
Clement Courbet063bed92017-11-03 12:12:27 +0000580 // The result of memcmp is negative, zero, or positive, so produce that by
581 // subtracting 2 extended compare bits: sub (ugt, ult).
582 // If a target prefers to use selects to get -1/0/1, they should be able
583 // to transform this later. The inverse transform (going from selects to math)
584 // may not be possible in the DAG because the selects got converted into
585 // branches before we got there.
Clement Courbetf7e6f5f2020-03-03 13:17:21 +0100586 Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
587 Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
Clement Courbet063bed92017-11-03 12:12:27 +0000588 Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
589 Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
590 return Builder.CreateSub(ZextUGT, ZextULT);
591}
592
593// This function expands the memcmp call into an inline expansion and returns
594// the memcmp result.
595Value *MemCmpExpansion::getMemCmpExpansion() {
Sanjay Patel5a48aef2018-01-06 16:16:04 +0000596 // Create the basic block framework for a multi-block expansion.
597 if (getNumBlocks() != 1) {
Clement Courbet063bed92017-11-03 12:12:27 +0000598 BasicBlock *StartBlock = CI->getParent();
599 EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
600 setupEndBlockPHINodes();
601 createResultBlock();
602
603 // If return value of memcmp is not used in a zero equality, we need to
604 // calculate which source was larger. The calculation requires the
605 // two loaded source values of each load compare block.
606 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000607 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
Clement Courbet063bed92017-11-03 12:12:27 +0000608
609 // Create the number of required load compare basic blocks.
610 createLoadCmpBlocks();
611
612 // Update the terminator added by splitBasicBlock to branch to the first
613 // LoadCmpBlock.
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000614 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
Clement Courbet063bed92017-11-03 12:12:27 +0000615 }
616
617 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
618
619 if (IsUsedForZeroCmp)
620 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
621 : getMemCmpExpansionZeroCase();
622
Sanjay Patelf3449872018-01-03 20:02:39 +0000623 if (getNumBlocks() == 1)
624 return getMemCmpOneBlock();
Clement Courbet063bed92017-11-03 12:12:27 +0000625
626 for (unsigned I = 0; I < getNumBlocks(); ++I) {
627 emitLoadCompareBlock(I);
628 }
629
630 emitMemCmpResultBlock();
631 return PhiRes;
632}
633
634// This function checks to see if an expansion of memcmp can be generated.
635// It checks for constant compare size that is less than the max inline size.
636// If an expansion cannot occur, returns false to leave as a library call.
637// Otherwise, the library call is replaced with a new IR instruction sequence.
638/// We want to transform:
639/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
640/// To:
641/// loadbb:
642/// %0 = bitcast i32* %buffer2 to i8*
643/// %1 = bitcast i32* %buffer1 to i8*
644/// %2 = bitcast i8* %1 to i64*
645/// %3 = bitcast i8* %0 to i64*
646/// %4 = load i64, i64* %2
647/// %5 = load i64, i64* %3
648/// %6 = call i64 @llvm.bswap.i64(i64 %4)
649/// %7 = call i64 @llvm.bswap.i64(i64 %5)
650/// %8 = sub i64 %6, %7
651/// %9 = icmp ne i64 %8, 0
652/// br i1 %9, label %res_block, label %loadbb1
653/// res_block: ; preds = %loadbb2,
654/// %loadbb1, %loadbb
655/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
656/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
657/// %10 = icmp ult i64 %phi.src1, %phi.src2
658/// %11 = select i1 %10, i32 -1, i32 1
659/// br label %endblock
660/// loadbb1: ; preds = %loadbb
661/// %12 = bitcast i32* %buffer2 to i8*
662/// %13 = bitcast i32* %buffer1 to i8*
663/// %14 = bitcast i8* %13 to i32*
664/// %15 = bitcast i8* %12 to i32*
665/// %16 = getelementptr i32, i32* %14, i32 2
666/// %17 = getelementptr i32, i32* %15, i32 2
667/// %18 = load i32, i32* %16
668/// %19 = load i32, i32* %17
669/// %20 = call i32 @llvm.bswap.i32(i32 %18)
670/// %21 = call i32 @llvm.bswap.i32(i32 %19)
671/// %22 = zext i32 %20 to i64
672/// %23 = zext i32 %21 to i64
673/// %24 = sub i64 %22, %23
674/// %25 = icmp ne i64 %24, 0
675/// br i1 %25, label %res_block, label %loadbb2
676/// loadbb2: ; preds = %loadbb1
677/// %26 = bitcast i32* %buffer2 to i8*
678/// %27 = bitcast i32* %buffer1 to i8*
679/// %28 = bitcast i8* %27 to i16*
680/// %29 = bitcast i8* %26 to i16*
681/// %30 = getelementptr i16, i16* %28, i16 6
682/// %31 = getelementptr i16, i16* %29, i16 6
683/// %32 = load i16, i16* %30
684/// %33 = load i16, i16* %31
685/// %34 = call i16 @llvm.bswap.i16(i16 %32)
686/// %35 = call i16 @llvm.bswap.i16(i16 %33)
687/// %36 = zext i16 %34 to i64
688/// %37 = zext i16 %35 to i64
689/// %38 = sub i64 %36, %37
690/// %39 = icmp ne i64 %38, 0
691/// br i1 %39, label %res_block, label %loadbb3
692/// loadbb3: ; preds = %loadbb2
693/// %40 = bitcast i32* %buffer2 to i8*
694/// %41 = bitcast i32* %buffer1 to i8*
695/// %42 = getelementptr i8, i8* %41, i8 14
696/// %43 = getelementptr i8, i8* %40, i8 14
697/// %44 = load i8, i8* %42
698/// %45 = load i8, i8* %43
699/// %46 = zext i8 %44 to i32
700/// %47 = zext i8 %45 to i32
701/// %48 = sub i32 %46, %47
702/// br label %endblock
703/// endblock: ; preds = %res_block,
704/// %loadbb3
705/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
706/// ret i32 %phi.res
707static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800708 const TargetLowering *TLI, const DataLayout *DL,
709 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
Clement Courbet063bed92017-11-03 12:12:27 +0000710 NumMemCmpCalls++;
711
712 // Early exit from expansion if -Oz.
Evandro Menezes85bd3972019-04-04 22:40:06 +0000713 if (CI->getFunction()->hasMinSize())
Clement Courbet063bed92017-11-03 12:12:27 +0000714 return false;
715
716 // Early exit from expansion if size is not a constant.
717 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
718 if (!SizeCast) {
719 NumMemCmpNotConstant++;
720 return false;
721 }
722 const uint64_t SizeVal = SizeCast->getZExtValue();
723
724 if (SizeVal == 0) {
725 return false;
726 }
Clement Courbet063bed92017-11-03 12:12:27 +0000727 // TTI call to check if target would like to expand memcmp. Also, get the
728 // available load sizes.
729 const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800730 bool OptForSize = CI->getFunction()->hasOptSize() ||
731 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
732 auto Options = TTI->enableMemCmpExpansion(OptForSize,
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000733 IsUsedForZeroCmp);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000734 if (!Options) return false;
Clement Courbet063bed92017-11-03 12:12:27 +0000735
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000736 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
737 Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
Clement Courbet063bed92017-11-03 12:12:27 +0000738
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800739 if (OptForSize &&
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000740 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
741 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
Sanjay Patelf3449872018-01-03 20:02:39 +0000742
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800743 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Clement Courbet3bc5ad52019-06-25 08:04:13 +0000744 Options.MaxNumLoads = MaxLoadsPerMemcmp;
745
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000746 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
Clement Courbet063bed92017-11-03 12:12:27 +0000747
748 // Don't expand if this will require more loads than desired by the target.
749 if (Expansion.getNumLoads() == 0) {
750 NumMemCmpGreaterThanMax++;
751 return false;
752 }
753
754 NumMemCmpInlined++;
755
756 Value *Res = Expansion.getMemCmpExpansion();
757
758 // Replace call with result of expansion and erase call.
759 CI->replaceAllUsesWith(Res);
760 CI->eraseFromParent();
761
762 return true;
763}
764
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000765
766
Clement Courbet063bed92017-11-03 12:12:27 +0000767class ExpandMemCmpPass : public FunctionPass {
768public:
769 static char ID;
770
771 ExpandMemCmpPass() : FunctionPass(ID) {
772 initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
773 }
774
775 bool runOnFunction(Function &F) override {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000776 if (skipFunction(F)) return false;
777
778 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
779 if (!TPC) {
Clement Courbet063bed92017-11-03 12:12:27 +0000780 return false;
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000781 }
782 const TargetLowering* TL =
783 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
Clement Courbet063bed92017-11-03 12:12:27 +0000784
785 const TargetLibraryInfo *TLI =
Teresa Johnson9c27b592019-09-07 03:09:36 +0000786 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
Clement Courbet063bed92017-11-03 12:12:27 +0000787 const TargetTransformInfo *TTI =
788 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800789 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
790 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
791 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
792 nullptr;
793 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
Clement Courbet063bed92017-11-03 12:12:27 +0000794 return !PA.areAllPreserved();
795 }
796
797private:
798 void getAnalysisUsage(AnalysisUsage &AU) const override {
799 AU.addRequired<TargetLibraryInfoWrapperPass>();
800 AU.addRequired<TargetTransformInfoWrapperPass>();
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800801 AU.addRequired<ProfileSummaryInfoWrapperPass>();
802 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
Clement Courbet063bed92017-11-03 12:12:27 +0000803 FunctionPass::getAnalysisUsage(AU);
804 }
805
806 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000807 const TargetTransformInfo *TTI,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800808 const TargetLowering* TL,
809 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
Clement Courbet063bed92017-11-03 12:12:27 +0000810 // Returns true if a change was made.
811 bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000812 const TargetTransformInfo *TTI, const TargetLowering* TL,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800813 const DataLayout& DL, ProfileSummaryInfo *PSI,
814 BlockFrequencyInfo *BFI);
Clement Courbet063bed92017-11-03 12:12:27 +0000815};
816
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000817bool ExpandMemCmpPass::runOnBlock(
818 BasicBlock &BB, const TargetLibraryInfo *TLI,
819 const TargetTransformInfo *TTI, const TargetLowering* TL,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800820 const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000821 for (Instruction& I : BB) {
Clement Courbet063bed92017-11-03 12:12:27 +0000822 CallInst *CI = dyn_cast<CallInst>(&I);
823 if (!CI) {
824 continue;
825 }
826 LibFunc Func;
Craig Topper8e140862020-04-15 22:02:35 -0700827 if (TLI->getLibFunc(*CI, Func) &&
Clement Courbet238af522019-03-20 11:51:11 +0000828 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800829 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
Clement Courbet063bed92017-11-03 12:12:27 +0000830 return true;
831 }
832 }
833 return false;
834}
835
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000836
837PreservedAnalyses ExpandMemCmpPass::runImpl(
838 Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800839 const TargetLowering* TL, ProfileSummaryInfo *PSI,
840 BlockFrequencyInfo *BFI) {
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000841 const DataLayout& DL = F.getParent()->getDataLayout();
Clement Courbet063bed92017-11-03 12:12:27 +0000842 bool MadeChanges = false;
843 for (auto BBIt = F.begin(); BBIt != F.end();) {
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800844 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
Clement Courbet063bed92017-11-03 12:12:27 +0000845 MadeChanges = true;
846 // If changes were made, restart the function from the beginning, since
847 // the structure of the function was changed.
848 BBIt = F.begin();
849 } else {
850 ++BBIt;
851 }
852 }
Clement Courbet6518b722020-03-03 13:17:21 +0100853 if (MadeChanges)
854 for (BasicBlock &BB : F)
855 SimplifyInstructionsInBlock(&BB);
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000856 return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
Clement Courbet063bed92017-11-03 12:12:27 +0000857}
858
859} // namespace
860
861char ExpandMemCmpPass::ID = 0;
862INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
863 "Expand memcmp() to load/stores", false, false)
864INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
865INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Hiroshi Yamauchid9ae4932019-12-05 09:39:37 -0800866INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
867INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
Clement Courbet063bed92017-11-03 12:12:27 +0000868INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
869 "Expand memcmp() to load/stores", false, false)
870
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +0000871FunctionPass *llvm::createExpandMemCmpPass() {
872 return new ExpandMemCmpPass();
873}