blob: b4037499d7d1e4f0e44f31d61749e474ddf26ac4 [file] [log] [blame]
Eugene Zelenkofa57bd02017-09-27 23:26:01 +00001//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// instrinsics
Ayman Musac5490e52017-05-15 11:30:54 +00003//
Chandler Carruth2946cd72019-01-19 08:50:56 +00004// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ayman Musac5490e52017-05-15 11:30:54 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This pass replaces masked memory intrinsics - when unsupported by the target
11// - with a chain of basic blocks, that deal with the elements one-by-one if the
12// appropriate mask bit is set.
13//
14//===----------------------------------------------------------------------===//
15
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000016#include "llvm/ADT/Twine.h"
Ayman Musac5490e52017-05-15 11:30:54 +000017#include "llvm/Analysis/TargetTransformInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000018#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000019#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/Constant.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/DerivedTypes.h"
23#include "llvm/IR/Function.h"
Ayman Musac5490e52017-05-15 11:30:54 +000024#include "llvm/IR/IRBuilder.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000025#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/Instruction.h"
27#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000028#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000029#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/Value.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Casting.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000034#include <algorithm>
35#include <cassert>
Ayman Musac5490e52017-05-15 11:30:54 +000036
37using namespace llvm;
38
39#define DEBUG_TYPE "scalarize-masked-mem-intrin"
40
41namespace {
42
43class ScalarizeMaskedMemIntrin : public FunctionPass {
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000044 const TargetTransformInfo *TTI = nullptr;
Ayman Musac5490e52017-05-15 11:30:54 +000045
46public:
47 static char ID; // Pass identification, replacement for typeid
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000048
49 explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
Ayman Musac5490e52017-05-15 11:30:54 +000050 initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
51 }
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000052
Ayman Musac5490e52017-05-15 11:30:54 +000053 bool runOnFunction(Function &F) override;
54
55 StringRef getPassName() const override {
56 return "Scalarize Masked Memory Intrinsics";
57 }
58
59 void getAnalysisUsage(AnalysisUsage &AU) const override {
60 AU.addRequired<TargetTransformInfoWrapperPass>();
61 }
62
63private:
64 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
65 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
66};
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000067
68} // end anonymous namespace
Ayman Musac5490e52017-05-15 11:30:54 +000069
70char ScalarizeMaskedMemIntrin::ID = 0;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000071
Matthias Braun1527baa2017-05-25 21:26:32 +000072INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
73 "Scalarize unsupported masked memory intrinsics", false, false)
Ayman Musac5490e52017-05-15 11:30:54 +000074
75FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
76 return new ScalarizeMaskedMemIntrin();
77}
78
Craig Topper8b4f0e12018-09-27 22:31:42 +000079static bool isConstantIntVector(Value *Mask) {
80 Constant *C = dyn_cast<Constant>(Mask);
81 if (!C)
82 return false;
83
84 unsigned NumElts = Mask->getType()->getVectorNumElements();
85 for (unsigned i = 0; i != NumElts; ++i) {
86 Constant *CElt = C->getAggregateElement(i);
87 if (!CElt || !isa<ConstantInt>(CElt))
88 return false;
89 }
90
91 return true;
92}
93
Ayman Musac5490e52017-05-15 11:30:54 +000094// Translate a masked load intrinsic like
95// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
96// <16 x i1> %mask, <16 x i32> %passthru)
97// to a chain of basic blocks, with loading element one-by-one if
98// the appropriate mask bit is set
99//
100// %1 = bitcast i8* %addr to i32*
101// %2 = extractelement <16 x i1> %mask, i32 0
Craig Topper49dad8b2018-09-27 21:28:39 +0000102// br i1 %2, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000103//
104// cond.load: ; preds = %0
Craig Topper49dad8b2018-09-27 21:28:39 +0000105// %3 = getelementptr i32* %1, i32 0
106// %4 = load i32* %3
Craig Topper7d234d62018-09-27 21:28:52 +0000107// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
Ayman Musac5490e52017-05-15 11:30:54 +0000108// br label %else
109//
110// else: ; preds = %0, %cond.load
Craig Topper49dad8b2018-09-27 21:28:39 +0000111// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
112// %6 = extractelement <16 x i1> %mask, i32 1
113// br i1 %6, label %cond.load1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000114//
115// cond.load1: ; preds = %else
Craig Topper49dad8b2018-09-27 21:28:39 +0000116// %7 = getelementptr i32* %1, i32 1
117// %8 = load i32* %7
118// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
Ayman Musac5490e52017-05-15 11:30:54 +0000119// br label %else2
120//
121// else2: ; preds = %else, %cond.load1
Craig Topper49dad8b2018-09-27 21:28:39 +0000122// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
123// %10 = extractelement <16 x i1> %mask, i32 2
124// br i1 %10, label %cond.load4, label %else5
Ayman Musac5490e52017-05-15 11:30:54 +0000125//
Craig Topperd84f6052019-03-08 23:03:43 +0000126static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
Ayman Musac5490e52017-05-15 11:30:54 +0000127 Value *Ptr = CI->getArgOperand(0);
128 Value *Alignment = CI->getArgOperand(1);
129 Value *Mask = CI->getArgOperand(2);
130 Value *Src0 = CI->getArgOperand(3);
131
132 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Craig Topper10ec0212018-09-27 22:31:40 +0000133 VectorType *VecType = cast<VectorType>(CI->getType());
Ayman Musac5490e52017-05-15 11:30:54 +0000134
Craig Topper10ec0212018-09-27 22:31:40 +0000135 Type *EltTy = VecType->getElementType();
Ayman Musac5490e52017-05-15 11:30:54 +0000136
137 IRBuilder<> Builder(CI->getContext());
138 Instruction *InsertPt = CI;
139 BasicBlock *IfBlock = CI->getParent();
Ayman Musac5490e52017-05-15 11:30:54 +0000140
141 Builder.SetInsertPoint(InsertPt);
142 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
143
144 // Short-cut if the mask is all-true.
Craig Topperdfe460d2018-09-27 21:28:41 +0000145 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
James Y Knight14359ef2019-02-01 20:44:24 +0000146 Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
Ayman Musac5490e52017-05-15 11:30:54 +0000147 CI->replaceAllUsesWith(NewI);
148 CI->eraseFromParent();
149 return;
150 }
151
152 // Adjust alignment for the scalar instruction.
Craig Topperbb50c382018-09-28 03:35:37 +0000153 AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
Craig Topper69f8c162019-03-09 02:08:41 +0000154 // Bitcast %addr from i8* to EltTy*
Ayman Musac5490e52017-05-15 11:30:54 +0000155 Type *NewPtrType =
Craig Topper69f8c162019-03-09 02:08:41 +0000156 EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Ayman Musac5490e52017-05-15 11:30:54 +0000157 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
158 unsigned VectorWidth = VecType->getNumElements();
159
Ayman Musac5490e52017-05-15 11:30:54 +0000160 // The result vector
Craig Topper7d234d62018-09-27 21:28:52 +0000161 Value *VResult = Src0;
Ayman Musac5490e52017-05-15 11:30:54 +0000162
Craig Topper8b4f0e12018-09-27 22:31:42 +0000163 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000164 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000165 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000166 continue;
Craig Topper69f8c162019-03-09 02:08:41 +0000167 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
James Y Knight14359ef2019-02-01 20:44:24 +0000168 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
Craig Topper69f8c162019-03-09 02:08:41 +0000169 VResult = Builder.CreateInsertElement(VResult, Load, Idx);
Ayman Musac5490e52017-05-15 11:30:54 +0000170 }
Craig Topper7d234d62018-09-27 21:28:52 +0000171 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000172 CI->eraseFromParent();
173 return;
174 }
175
Craig Topperb70026c2019-07-31 22:58:15 +0000176 // If the mask is not v1i1, use scalar bit test operations. This generates
177 // better results on X86 at least.
178 Value *SclrMask;
179 if (VectorWidth != 1) {
180 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
181 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
182 }
183
Ayman Musac5490e52017-05-15 11:30:54 +0000184 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musac5490e52017-05-15 11:30:54 +0000185 // Fill the "else" block, created in the previous iteration
186 //
187 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
Craig Topperb70026c2019-07-31 22:58:15 +0000188 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
189 // %cond = icmp ne i16 %mask_1, 0
Craig Topper04236812018-09-27 18:01:48 +0000190 // br i1 %mask_1, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000191 //
Craig Topperb70026c2019-07-31 22:58:15 +0000192 Value *Predicate;
193 if (VectorWidth != 1) {
194 Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
195 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
196 Builder.getIntN(VectorWidth, 0));
197 } else {
198 Predicate = Builder.CreateExtractElement(Mask, Idx);
199 }
Ayman Musac5490e52017-05-15 11:30:54 +0000200
201 // Create "cond" block
202 //
203 // %EltAddr = getelementptr i32* %1, i32 0
204 // %Elt = load i32* %EltAddr
205 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
206 //
Craig Topper4104c002018-10-30 20:33:58 +0000207 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
208 "cond.load");
Ayman Musac5490e52017-05-15 11:30:54 +0000209 Builder.SetInsertPoint(InsertPt);
210
Craig Topper69f8c162019-03-09 02:08:41 +0000211 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
James Y Knight14359ef2019-02-01 20:44:24 +0000212 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
Craig Topper69f8c162019-03-09 02:08:41 +0000213 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
Ayman Musac5490e52017-05-15 11:30:54 +0000214
215 // Create "else" block, fill it in the next iteration
216 BasicBlock *NewIfBlock =
217 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
218 Builder.SetInsertPoint(InsertPt);
219 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000220 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000221 OldBr->eraseFromParent();
Craig Topper4104c002018-10-30 20:33:58 +0000222 BasicBlock *PrevIfBlock = IfBlock;
Ayman Musac5490e52017-05-15 11:30:54 +0000223 IfBlock = NewIfBlock;
Craig Topper7d234d62018-09-27 21:28:52 +0000224
225 // Create the phi to join the new and previous value.
226 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
227 Phi->addIncoming(NewVResult, CondBlock);
228 Phi->addIncoming(VResult, PrevIfBlock);
229 VResult = Phi;
Ayman Musac5490e52017-05-15 11:30:54 +0000230 }
231
Craig Topper7d234d62018-09-27 21:28:52 +0000232 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000233 CI->eraseFromParent();
Craig Topperd84f6052019-03-08 23:03:43 +0000234
235 ModifiedDT = true;
Ayman Musac5490e52017-05-15 11:30:54 +0000236}
237
238// Translate a masked store intrinsic, like
239// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
240// <16 x i1> %mask)
241// to a chain of basic blocks, that stores element one-by-one if
242// the appropriate mask bit is set
243//
244// %1 = bitcast i8* %addr to i32*
245// %2 = extractelement <16 x i1> %mask, i32 0
Craig Topper49dad8b2018-09-27 21:28:39 +0000246// br i1 %2, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000247//
248// cond.store: ; preds = %0
Craig Topper49dad8b2018-09-27 21:28:39 +0000249// %3 = extractelement <16 x i32> %val, i32 0
250// %4 = getelementptr i32* %1, i32 0
251// store i32 %3, i32* %4
Ayman Musac5490e52017-05-15 11:30:54 +0000252// br label %else
253//
254// else: ; preds = %0, %cond.store
Craig Topper49dad8b2018-09-27 21:28:39 +0000255// %5 = extractelement <16 x i1> %mask, i32 1
256// br i1 %5, label %cond.store1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000257//
258// cond.store1: ; preds = %else
Craig Topper49dad8b2018-09-27 21:28:39 +0000259// %6 = extractelement <16 x i32> %val, i32 1
260// %7 = getelementptr i32* %1, i32 1
261// store i32 %6, i32* %7
Ayman Musac5490e52017-05-15 11:30:54 +0000262// br label %else2
263// . . .
Craig Topperd84f6052019-03-08 23:03:43 +0000264static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
Ayman Musac5490e52017-05-15 11:30:54 +0000265 Value *Src = CI->getArgOperand(0);
266 Value *Ptr = CI->getArgOperand(1);
267 Value *Alignment = CI->getArgOperand(2);
268 Value *Mask = CI->getArgOperand(3);
269
270 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Craig Topper10ec0212018-09-27 22:31:40 +0000271 VectorType *VecType = cast<VectorType>(Src->getType());
Ayman Musac5490e52017-05-15 11:30:54 +0000272
273 Type *EltTy = VecType->getElementType();
274
275 IRBuilder<> Builder(CI->getContext());
276 Instruction *InsertPt = CI;
277 BasicBlock *IfBlock = CI->getParent();
278 Builder.SetInsertPoint(InsertPt);
279 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
280
281 // Short-cut if the mask is all-true.
Craig Topperdfe460d2018-09-27 21:28:41 +0000282 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Ayman Musac5490e52017-05-15 11:30:54 +0000283 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
284 CI->eraseFromParent();
285 return;
286 }
287
288 // Adjust alignment for the scalar instruction.
Craig Topperbb50c382018-09-28 03:35:37 +0000289 AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
Craig Topper69f8c162019-03-09 02:08:41 +0000290 // Bitcast %addr from i8* to EltTy*
Ayman Musac5490e52017-05-15 11:30:54 +0000291 Type *NewPtrType =
Craig Topper69f8c162019-03-09 02:08:41 +0000292 EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Ayman Musac5490e52017-05-15 11:30:54 +0000293 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
294 unsigned VectorWidth = VecType->getNumElements();
295
Craig Topper8b4f0e12018-09-27 22:31:42 +0000296 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000297 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000298 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000299 continue;
Craig Topper69f8c162019-03-09 02:08:41 +0000300 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
301 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
Ayman Musac5490e52017-05-15 11:30:54 +0000302 Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
303 }
304 CI->eraseFromParent();
305 return;
306 }
307
Craig Topperb70026c2019-07-31 22:58:15 +0000308 // If the mask is not v1i1, use scalar bit test operations. This generates
309 // better results on X86 at least.
310 Value *SclrMask;
311 if (VectorWidth != 1) {
312 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
313 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
314 }
315
Ayman Musac5490e52017-05-15 11:30:54 +0000316 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musac5490e52017-05-15 11:30:54 +0000317 // Fill the "else" block, created in the previous iteration
318 //
Craig Topperb70026c2019-07-31 22:58:15 +0000319 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
320 // %cond = icmp ne i16 %mask_1, 0
Craig Topper04236812018-09-27 18:01:48 +0000321 // br i1 %mask_1, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000322 //
Craig Topperb70026c2019-07-31 22:58:15 +0000323 Value *Predicate;
324 if (VectorWidth != 1) {
325 Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
326 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
327 Builder.getIntN(VectorWidth, 0));
328 } else {
329 Predicate = Builder.CreateExtractElement(Mask, Idx);
330 }
Ayman Musac5490e52017-05-15 11:30:54 +0000331
332 // Create "cond" block
333 //
334 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
335 // %EltAddr = getelementptr i32* %1, i32 0
336 // %store i32 %OneElt, i32* %EltAddr
337 //
338 BasicBlock *CondBlock =
339 IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
340 Builder.SetInsertPoint(InsertPt);
341
Craig Topper69f8c162019-03-09 02:08:41 +0000342 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
343 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
Ayman Musac5490e52017-05-15 11:30:54 +0000344 Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
345
346 // Create "else" block, fill it in the next iteration
347 BasicBlock *NewIfBlock =
348 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
349 Builder.SetInsertPoint(InsertPt);
350 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000351 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000352 OldBr->eraseFromParent();
353 IfBlock = NewIfBlock;
354 }
355 CI->eraseFromParent();
Craig Topperd84f6052019-03-08 23:03:43 +0000356
357 ModifiedDT = true;
Ayman Musac5490e52017-05-15 11:30:54 +0000358}
359
360// Translate a masked gather intrinsic like
361// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
362// <16 x i1> %Mask, <16 x i32> %Src)
363// to a chain of basic blocks, with loading element one-by-one if
364// the appropriate mask bit is set
365//
Craig Topper49dad8b2018-09-27 21:28:39 +0000366// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
367// %Mask0 = extractelement <16 x i1> %Mask, i32 0
368// br i1 %Mask0, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000369//
370// cond.load:
Craig Topper49dad8b2018-09-27 21:28:39 +0000371// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
372// %Load0 = load i32, i32* %Ptr0, align 4
373// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
Ayman Musac5490e52017-05-15 11:30:54 +0000374// br label %else
375//
376// else:
Craig Topper49dad8b2018-09-27 21:28:39 +0000377// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
378// %Mask1 = extractelement <16 x i1> %Mask, i32 1
379// br i1 %Mask1, label %cond.load1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000380//
381// cond.load1:
Craig Topper49dad8b2018-09-27 21:28:39 +0000382// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
383// %Load1 = load i32, i32* %Ptr1, align 4
384// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
Ayman Musac5490e52017-05-15 11:30:54 +0000385// br label %else2
386// . . .
Craig Topper49dad8b2018-09-27 21:28:39 +0000387// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
Ayman Musac5490e52017-05-15 11:30:54 +0000388// ret <16 x i32> %Result
Craig Topperd84f6052019-03-08 23:03:43 +0000389static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
Ayman Musac5490e52017-05-15 11:30:54 +0000390 Value *Ptrs = CI->getArgOperand(0);
391 Value *Alignment = CI->getArgOperand(1);
392 Value *Mask = CI->getArgOperand(2);
393 Value *Src0 = CI->getArgOperand(3);
394
Craig Topper10ec0212018-09-27 22:31:40 +0000395 VectorType *VecType = cast<VectorType>(CI->getType());
James Y Knight14359ef2019-02-01 20:44:24 +0000396 Type *EltTy = VecType->getElementType();
Ayman Musac5490e52017-05-15 11:30:54 +0000397
398 IRBuilder<> Builder(CI->getContext());
399 Instruction *InsertPt = CI;
400 BasicBlock *IfBlock = CI->getParent();
Ayman Musac5490e52017-05-15 11:30:54 +0000401 Builder.SetInsertPoint(InsertPt);
402 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
403
404 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
405
Ayman Musac5490e52017-05-15 11:30:54 +0000406 // The result vector
Craig Topper6911bfe2018-09-27 21:28:59 +0000407 Value *VResult = Src0;
Ayman Musac5490e52017-05-15 11:30:54 +0000408 unsigned VectorWidth = VecType->getNumElements();
409
410 // Shorten the way if the mask is a vector of constants.
Craig Topper8b4f0e12018-09-27 22:31:42 +0000411 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000412 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000413 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000414 continue;
Craig Topper69f8c162019-03-09 02:08:41 +0000415 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000416 LoadInst *Load =
James Y Knight14359ef2019-02-01 20:44:24 +0000417 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
Craig Topper69f8c162019-03-09 02:08:41 +0000418 VResult =
419 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000420 }
Craig Topper6911bfe2018-09-27 21:28:59 +0000421 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000422 CI->eraseFromParent();
423 return;
424 }
425
Craig Topperb70026c2019-07-31 22:58:15 +0000426 // If the mask is not v1i1, use scalar bit test operations. This generates
427 // better results on X86 at least.
428 Value *SclrMask;
429 if (VectorWidth != 1) {
430 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
431 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
432 }
433
Ayman Musac5490e52017-05-15 11:30:54 +0000434 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musac5490e52017-05-15 11:30:54 +0000435 // Fill the "else" block, created in the previous iteration
436 //
Craig Topperb70026c2019-07-31 22:58:15 +0000437 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
438 // %cond = icmp ne i16 %mask_1, 0
Craig Topper04236812018-09-27 18:01:48 +0000439 // br i1 %Mask1, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000440 //
Ayman Musac5490e52017-05-15 11:30:54 +0000441
Craig Topperb70026c2019-07-31 22:58:15 +0000442 Value *Predicate;
443 if (VectorWidth != 1) {
444 Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
445 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
446 Builder.getIntN(VectorWidth, 0));
447 } else {
448 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
449 }
Ayman Musac5490e52017-05-15 11:30:54 +0000450
451 // Create "cond" block
452 //
453 // %EltAddr = getelementptr i32* %1, i32 0
454 // %Elt = load i32* %EltAddr
455 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
456 //
Craig Topper4104c002018-10-30 20:33:58 +0000457 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Ayman Musac5490e52017-05-15 11:30:54 +0000458 Builder.SetInsertPoint(InsertPt);
459
Craig Topper69f8c162019-03-09 02:08:41 +0000460 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000461 LoadInst *Load =
James Y Knight14359ef2019-02-01 20:44:24 +0000462 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
Craig Topper69f8c162019-03-09 02:08:41 +0000463 Value *NewVResult =
464 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000465
466 // Create "else" block, fill it in the next iteration
467 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
468 Builder.SetInsertPoint(InsertPt);
469 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000470 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000471 OldBr->eraseFromParent();
Craig Topper4104c002018-10-30 20:33:58 +0000472 BasicBlock *PrevIfBlock = IfBlock;
Ayman Musac5490e52017-05-15 11:30:54 +0000473 IfBlock = NewIfBlock;
Craig Topper6911bfe2018-09-27 21:28:59 +0000474
475 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
476 Phi->addIncoming(NewVResult, CondBlock);
477 Phi->addIncoming(VResult, PrevIfBlock);
478 VResult = Phi;
Ayman Musac5490e52017-05-15 11:30:54 +0000479 }
480
Craig Topper6911bfe2018-09-27 21:28:59 +0000481 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000482 CI->eraseFromParent();
Craig Topperd84f6052019-03-08 23:03:43 +0000483
484 ModifiedDT = true;
Ayman Musac5490e52017-05-15 11:30:54 +0000485}
486
487// Translate a masked scatter intrinsic, like
488// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
489// <16 x i1> %Mask)
490// to a chain of basic blocks, that stores element one-by-one if
491// the appropriate mask bit is set.
492//
Craig Topper49dad8b2018-09-27 21:28:39 +0000493// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
494// %Mask0 = extractelement <16 x i1> %Mask, i32 0
495// br i1 %Mask0, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000496//
497// cond.store:
Craig Topper49dad8b2018-09-27 21:28:39 +0000498// %Elt0 = extractelement <16 x i32> %Src, i32 0
499// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
500// store i32 %Elt0, i32* %Ptr0, align 4
Ayman Musac5490e52017-05-15 11:30:54 +0000501// br label %else
502//
503// else:
Craig Topper49dad8b2018-09-27 21:28:39 +0000504// %Mask1 = extractelement <16 x i1> %Mask, i32 1
505// br i1 %Mask1, label %cond.store1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000506//
507// cond.store1:
Craig Topper49dad8b2018-09-27 21:28:39 +0000508// %Elt1 = extractelement <16 x i32> %Src, i32 1
509// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
510// store i32 %Elt1, i32* %Ptr1, align 4
Ayman Musac5490e52017-05-15 11:30:54 +0000511// br label %else2
512// . . .
Craig Topperd84f6052019-03-08 23:03:43 +0000513static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
Ayman Musac5490e52017-05-15 11:30:54 +0000514 Value *Src = CI->getArgOperand(0);
515 Value *Ptrs = CI->getArgOperand(1);
516 Value *Alignment = CI->getArgOperand(2);
517 Value *Mask = CI->getArgOperand(3);
518
519 assert(isa<VectorType>(Src->getType()) &&
520 "Unexpected data type in masked scatter intrinsic");
521 assert(isa<VectorType>(Ptrs->getType()) &&
522 isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
523 "Vector of pointers is expected in masked scatter intrinsic");
524
525 IRBuilder<> Builder(CI->getContext());
526 Instruction *InsertPt = CI;
527 BasicBlock *IfBlock = CI->getParent();
528 Builder.SetInsertPoint(InsertPt);
529 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
530
531 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
532 unsigned VectorWidth = Src->getType()->getVectorNumElements();
533
534 // Shorten the way if the mask is a vector of constants.
Craig Topper8b4f0e12018-09-27 22:31:42 +0000535 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000536 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topper5f79d742019-06-02 22:52:34 +0000537 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000538 continue;
Craig Topper69f8c162019-03-09 02:08:41 +0000539 Value *OneElt =
540 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
541 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000542 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
543 }
544 CI->eraseFromParent();
545 return;
546 }
Craig Topperdfe460d2018-09-27 21:28:41 +0000547
Craig Topperb70026c2019-07-31 22:58:15 +0000548 // If the mask is not v1i1, use scalar bit test operations. This generates
549 // better results on X86 at least.
550 Value *SclrMask;
551 if (VectorWidth != 1) {
552 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
553 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
554 }
555
Ayman Musac5490e52017-05-15 11:30:54 +0000556 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
557 // Fill the "else" block, created in the previous iteration
558 //
Craig Topperb70026c2019-07-31 22:58:15 +0000559 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
560 // %cond = icmp ne i16 %mask_1, 0
Craig Topper04236812018-09-27 18:01:48 +0000561 // br i1 %Mask1, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000562 //
Craig Topperb70026c2019-07-31 22:58:15 +0000563 Value *Predicate;
564 if (VectorWidth != 1) {
565 Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
566 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
567 Builder.getIntN(VectorWidth, 0));
568 } else {
569 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
570 }
Ayman Musac5490e52017-05-15 11:30:54 +0000571
572 // Create "cond" block
573 //
Craig Topper49dad8b2018-09-27 21:28:39 +0000574 // %Elt1 = extractelement <16 x i32> %Src, i32 1
575 // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
576 // %store i32 %Elt1, i32* %Ptr1
Ayman Musac5490e52017-05-15 11:30:54 +0000577 //
578 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
579 Builder.SetInsertPoint(InsertPt);
580
Craig Topper69f8c162019-03-09 02:08:41 +0000581 Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
582 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000583 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
584
585 // Create "else" block, fill it in the next iteration
586 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
587 Builder.SetInsertPoint(InsertPt);
588 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000589 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000590 OldBr->eraseFromParent();
591 IfBlock = NewIfBlock;
592 }
593 CI->eraseFromParent();
Craig Topperd84f6052019-03-08 23:03:43 +0000594
595 ModifiedDT = true;
Ayman Musac5490e52017-05-15 11:30:54 +0000596}
597
Craig Topper9f0b17a2019-03-21 17:38:52 +0000598static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
599 Value *Ptr = CI->getArgOperand(0);
600 Value *Mask = CI->getArgOperand(1);
601 Value *PassThru = CI->getArgOperand(2);
602
603 VectorType *VecType = cast<VectorType>(CI->getType());
604
605 Type *EltTy = VecType->getElementType();
606
607 IRBuilder<> Builder(CI->getContext());
608 Instruction *InsertPt = CI;
609 BasicBlock *IfBlock = CI->getParent();
610
611 Builder.SetInsertPoint(InsertPt);
612 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
613
614 unsigned VectorWidth = VecType->getNumElements();
615
616 // The result vector
617 Value *VResult = PassThru;
618
Craig Topperde9b1d72019-08-02 20:04:34 +0000619 // Shorten the way if the mask is a vector of constants.
620 if (isConstantIntVector(Mask)) {
621 unsigned MemIndex = 0;
622 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
623 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
624 continue;
625 Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
626 LoadInst *Load =
627 Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx));
628 VResult =
629 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
630 ++MemIndex;
631 }
632 CI->replaceAllUsesWith(VResult);
633 CI->eraseFromParent();
634 return;
635 }
636
Craig Topperb1cfcd12019-08-02 23:43:53 +0000637 // If the mask is not v1i1, use scalar bit test operations. This generates
638 // better results on X86 at least.
639 Value *SclrMask;
640 if (VectorWidth != 1) {
641 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
642 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
643 }
644
Craig Topper9f0b17a2019-03-21 17:38:52 +0000645 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
646 // Fill the "else" block, created in the previous iteration
647 //
648 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
649 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
650 // br i1 %mask_1, label %cond.load, label %else
651 //
652
Craig Topperb1cfcd12019-08-02 23:43:53 +0000653 Value *Predicate;
654 if (VectorWidth != 1) {
655 Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
656 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
657 Builder.getIntN(VectorWidth, 0));
658 } else {
659 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
660 }
Craig Topper9f0b17a2019-03-21 17:38:52 +0000661
662 // Create "cond" block
663 //
664 // %EltAddr = getelementptr i32* %1, i32 0
665 // %Elt = load i32* %EltAddr
666 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
667 //
668 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
669 "cond.load");
670 Builder.SetInsertPoint(InsertPt);
671
672 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1);
673 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
674
675 // Move the pointer if there are more blocks to come.
676 Value *NewPtr;
677 if ((Idx + 1) != VectorWidth)
678 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
679
680 // Create "else" block, fill it in the next iteration
681 BasicBlock *NewIfBlock =
682 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
683 Builder.SetInsertPoint(InsertPt);
684 Instruction *OldBr = IfBlock->getTerminator();
685 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
686 OldBr->eraseFromParent();
687 BasicBlock *PrevIfBlock = IfBlock;
688 IfBlock = NewIfBlock;
689
690 // Create the phi to join the new and previous value.
691 PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
692 ResultPhi->addIncoming(NewVResult, CondBlock);
693 ResultPhi->addIncoming(VResult, PrevIfBlock);
694 VResult = ResultPhi;
695
696 // Add a PHI for the pointer if this isn't the last iteration.
697 if ((Idx + 1) != VectorWidth) {
698 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
699 PtrPhi->addIncoming(NewPtr, CondBlock);
700 PtrPhi->addIncoming(Ptr, PrevIfBlock);
701 Ptr = PtrPhi;
702 }
703 }
704
705 CI->replaceAllUsesWith(VResult);
706 CI->eraseFromParent();
707
708 ModifiedDT = true;
709}
710
711static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
712 Value *Src = CI->getArgOperand(0);
713 Value *Ptr = CI->getArgOperand(1);
714 Value *Mask = CI->getArgOperand(2);
715
716 VectorType *VecType = cast<VectorType>(Src->getType());
717
718 IRBuilder<> Builder(CI->getContext());
719 Instruction *InsertPt = CI;
720 BasicBlock *IfBlock = CI->getParent();
721
722 Builder.SetInsertPoint(InsertPt);
723 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
724
725 Type *EltTy = VecType->getVectorElementType();
726
727 unsigned VectorWidth = VecType->getNumElements();
728
Craig Topperde9b1d72019-08-02 20:04:34 +0000729 // Shorten the way if the mask is a vector of constants.
730 if (isConstantIntVector(Mask)) {
731 unsigned MemIndex = 0;
732 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
733 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
734 continue;
735 Value *OneElt =
736 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
737 Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
738 Builder.CreateAlignedStore(OneElt, NewPtr, 1);
739 ++MemIndex;
740 }
741 CI->eraseFromParent();
742 return;
743 }
744
Craig Topperb1cfcd12019-08-02 23:43:53 +0000745 // If the mask is not v1i1, use scalar bit test operations. This generates
746 // better results on X86 at least.
747 Value *SclrMask;
748 if (VectorWidth != 1) {
749 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
750 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
751 }
752
Craig Topper9f0b17a2019-03-21 17:38:52 +0000753 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
754 // Fill the "else" block, created in the previous iteration
755 //
756 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
757 // br i1 %mask_1, label %cond.store, label %else
758 //
Craig Topperb1cfcd12019-08-02 23:43:53 +0000759 Value *Predicate;
760 if (VectorWidth != 1) {
761 Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
762 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
763 Builder.getIntN(VectorWidth, 0));
764 } else {
765 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
766 }
Craig Topper9f0b17a2019-03-21 17:38:52 +0000767
768 // Create "cond" block
769 //
770 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
771 // %EltAddr = getelementptr i32* %1, i32 0
772 // %store i32 %OneElt, i32* %EltAddr
773 //
774 BasicBlock *CondBlock =
775 IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
776 Builder.SetInsertPoint(InsertPt);
777
778 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
779 Builder.CreateAlignedStore(OneElt, Ptr, 1);
780
781 // Move the pointer if there are more blocks to come.
782 Value *NewPtr;
783 if ((Idx + 1) != VectorWidth)
784 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
785
786 // Create "else" block, fill it in the next iteration
787 BasicBlock *NewIfBlock =
788 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
789 Builder.SetInsertPoint(InsertPt);
790 Instruction *OldBr = IfBlock->getTerminator();
791 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
792 OldBr->eraseFromParent();
793 BasicBlock *PrevIfBlock = IfBlock;
794 IfBlock = NewIfBlock;
795
796 // Add a PHI for the pointer if this isn't the last iteration.
797 if ((Idx + 1) != VectorWidth) {
798 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
799 PtrPhi->addIncoming(NewPtr, CondBlock);
800 PtrPhi->addIncoming(Ptr, PrevIfBlock);
801 Ptr = PtrPhi;
802 }
803 }
804 CI->eraseFromParent();
805
806 ModifiedDT = true;
807}
808
Ayman Musac5490e52017-05-15 11:30:54 +0000809bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
Ayman Musac5490e52017-05-15 11:30:54 +0000810 bool EverMadeChange = false;
811
812 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
813
814 bool MadeChange = true;
815 while (MadeChange) {
816 MadeChange = false;
817 for (Function::iterator I = F.begin(); I != F.end();) {
818 BasicBlock *BB = &*I++;
819 bool ModifiedDTOnIteration = false;
820 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
821
822 // Restart BB iteration if the dominator tree of the Function was changed
823 if (ModifiedDTOnIteration)
824 break;
825 }
826
827 EverMadeChange |= MadeChange;
828 }
829
830 return EverMadeChange;
831}
832
833bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
834 bool MadeChange = false;
835
836 BasicBlock::iterator CurInstIterator = BB.begin();
837 while (CurInstIterator != BB.end()) {
838 if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
839 MadeChange |= optimizeCallInst(CI, ModifiedDT);
840 if (ModifiedDT)
841 return true;
842 }
843
844 return MadeChange;
845}
846
847bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
848 bool &ModifiedDT) {
Ayman Musac5490e52017-05-15 11:30:54 +0000849 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
850 if (II) {
851 switch (II->getIntrinsicID()) {
852 default:
853 break;
Sam Parker527a35e2019-10-14 10:00:21 +0000854 case Intrinsic::masked_load: {
Ayman Musac5490e52017-05-15 11:30:54 +0000855 // Scalarize unsupported vector masked load
Sam Parker527a35e2019-10-14 10:00:21 +0000856 unsigned Alignment =
857 cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
858 if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
Craig Topper8de7bc02019-03-21 05:54:37 +0000859 return false;
860 scalarizeMaskedLoad(CI, ModifiedDT);
861 return true;
Sam Parker527a35e2019-10-14 10:00:21 +0000862 }
863 case Intrinsic::masked_store: {
864 unsigned Alignment =
865 cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
866 if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
867 MaybeAlign(Alignment)))
Craig Topper8de7bc02019-03-21 05:54:37 +0000868 return false;
869 scalarizeMaskedStore(CI, ModifiedDT);
870 return true;
Sam Parker527a35e2019-10-14 10:00:21 +0000871 }
Eugene Zelenkofa57bd02017-09-27 23:26:01 +0000872 case Intrinsic::masked_gather:
Craig Topper8de7bc02019-03-21 05:54:37 +0000873 if (TTI->isLegalMaskedGather(CI->getType()))
874 return false;
875 scalarizeMaskedGather(CI, ModifiedDT);
876 return true;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +0000877 case Intrinsic::masked_scatter:
Craig Topper8de7bc02019-03-21 05:54:37 +0000878 if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
879 return false;
880 scalarizeMaskedScatter(CI, ModifiedDT);
881 return true;
Craig Topper9f0b17a2019-03-21 17:38:52 +0000882 case Intrinsic::masked_expandload:
883 if (TTI->isLegalMaskedExpandLoad(CI->getType()))
884 return false;
885 scalarizeMaskedExpandLoad(CI, ModifiedDT);
886 return true;
887 case Intrinsic::masked_compressstore:
888 if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
889 return false;
890 scalarizeMaskedCompressStore(CI, ModifiedDT);
891 return true;
Ayman Musac5490e52017-05-15 11:30:54 +0000892 }
Ayman Musac5490e52017-05-15 11:30:54 +0000893 }
894
895 return false;
896}