blob: 64b358b59803d3b68cee876375b0f897f8306561 [file] [log] [blame]
Eugene Zelenkofa57bd02017-09-27 23:26:01 +00001//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// instrinsics
Ayman Musac5490e52017-05-15 11:30:54 +00003//
Chandler Carruth2946cd72019-01-19 08:50:56 +00004// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ayman Musac5490e52017-05-15 11:30:54 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This pass replaces masked memory intrinsics - when unsupported by the target
11// - with a chain of basic blocks, that deal with the elements one-by-one if the
12// appropriate mask bit is set.
13//
14//===----------------------------------------------------------------------===//
15
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000016#include "llvm/ADT/Twine.h"
Ayman Musac5490e52017-05-15 11:30:54 +000017#include "llvm/Analysis/TargetTransformInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000018#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000019#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/Constant.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/DerivedTypes.h"
23#include "llvm/IR/Function.h"
Ayman Musac5490e52017-05-15 11:30:54 +000024#include "llvm/IR/IRBuilder.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000025#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/Instruction.h"
27#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000028#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000029#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/Value.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Casting.h"
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000034#include <algorithm>
35#include <cassert>
Ayman Musac5490e52017-05-15 11:30:54 +000036
37using namespace llvm;
38
39#define DEBUG_TYPE "scalarize-masked-mem-intrin"
40
41namespace {
42
43class ScalarizeMaskedMemIntrin : public FunctionPass {
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000044 const TargetTransformInfo *TTI = nullptr;
Ayman Musac5490e52017-05-15 11:30:54 +000045
46public:
47 static char ID; // Pass identification, replacement for typeid
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000048
49 explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
Ayman Musac5490e52017-05-15 11:30:54 +000050 initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
51 }
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000052
Ayman Musac5490e52017-05-15 11:30:54 +000053 bool runOnFunction(Function &F) override;
54
55 StringRef getPassName() const override {
56 return "Scalarize Masked Memory Intrinsics";
57 }
58
59 void getAnalysisUsage(AnalysisUsage &AU) const override {
60 AU.addRequired<TargetTransformInfoWrapperPass>();
61 }
62
63private:
64 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
65 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
66};
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000067
68} // end anonymous namespace
Ayman Musac5490e52017-05-15 11:30:54 +000069
70char ScalarizeMaskedMemIntrin::ID = 0;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +000071
Matthias Braun1527baa2017-05-25 21:26:32 +000072INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
73 "Scalarize unsupported masked memory intrinsics", false, false)
Ayman Musac5490e52017-05-15 11:30:54 +000074
75FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
76 return new ScalarizeMaskedMemIntrin();
77}
78
Craig Topper8b4f0e12018-09-27 22:31:42 +000079static bool isConstantIntVector(Value *Mask) {
80 Constant *C = dyn_cast<Constant>(Mask);
81 if (!C)
82 return false;
83
84 unsigned NumElts = Mask->getType()->getVectorNumElements();
85 for (unsigned i = 0; i != NumElts; ++i) {
86 Constant *CElt = C->getAggregateElement(i);
87 if (!CElt || !isa<ConstantInt>(CElt))
88 return false;
89 }
90
91 return true;
92}
93
Ayman Musac5490e52017-05-15 11:30:54 +000094// Translate a masked load intrinsic like
95// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
96// <16 x i1> %mask, <16 x i32> %passthru)
97// to a chain of basic blocks, with loading element one-by-one if
98// the appropriate mask bit is set
99//
100// %1 = bitcast i8* %addr to i32*
101// %2 = extractelement <16 x i1> %mask, i32 0
Craig Topper49dad8b2018-09-27 21:28:39 +0000102// br i1 %2, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000103//
104// cond.load: ; preds = %0
Craig Topper49dad8b2018-09-27 21:28:39 +0000105// %3 = getelementptr i32* %1, i32 0
106// %4 = load i32* %3
Craig Topper7d234d62018-09-27 21:28:52 +0000107// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
Ayman Musac5490e52017-05-15 11:30:54 +0000108// br label %else
109//
110// else: ; preds = %0, %cond.load
Craig Topper49dad8b2018-09-27 21:28:39 +0000111// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
112// %6 = extractelement <16 x i1> %mask, i32 1
113// br i1 %6, label %cond.load1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000114//
115// cond.load1: ; preds = %else
Craig Topper49dad8b2018-09-27 21:28:39 +0000116// %7 = getelementptr i32* %1, i32 1
117// %8 = load i32* %7
118// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
Ayman Musac5490e52017-05-15 11:30:54 +0000119// br label %else2
120//
121// else2: ; preds = %else, %cond.load1
Craig Topper49dad8b2018-09-27 21:28:39 +0000122// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
123// %10 = extractelement <16 x i1> %mask, i32 2
124// br i1 %10, label %cond.load4, label %else5
Ayman Musac5490e52017-05-15 11:30:54 +0000125//
126static void scalarizeMaskedLoad(CallInst *CI) {
127 Value *Ptr = CI->getArgOperand(0);
128 Value *Alignment = CI->getArgOperand(1);
129 Value *Mask = CI->getArgOperand(2);
130 Value *Src0 = CI->getArgOperand(3);
131
132 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Craig Topper10ec0212018-09-27 22:31:40 +0000133 VectorType *VecType = cast<VectorType>(CI->getType());
Ayman Musac5490e52017-05-15 11:30:54 +0000134
Craig Topper10ec0212018-09-27 22:31:40 +0000135 Type *EltTy = VecType->getElementType();
Ayman Musac5490e52017-05-15 11:30:54 +0000136
137 IRBuilder<> Builder(CI->getContext());
138 Instruction *InsertPt = CI;
139 BasicBlock *IfBlock = CI->getParent();
Ayman Musac5490e52017-05-15 11:30:54 +0000140
141 Builder.SetInsertPoint(InsertPt);
142 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
143
144 // Short-cut if the mask is all-true.
Craig Topperdfe460d2018-09-27 21:28:41 +0000145 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Ayman Musac5490e52017-05-15 11:30:54 +0000146 Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
147 CI->replaceAllUsesWith(NewI);
148 CI->eraseFromParent();
149 return;
150 }
151
152 // Adjust alignment for the scalar instruction.
Craig Topperbb50c382018-09-28 03:35:37 +0000153 AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
Ayman Musac5490e52017-05-15 11:30:54 +0000154 // Bitcast %addr fron i8* to EltTy*
155 Type *NewPtrType =
156 EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
157 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
158 unsigned VectorWidth = VecType->getNumElements();
159
Ayman Musac5490e52017-05-15 11:30:54 +0000160 // The result vector
Craig Topper7d234d62018-09-27 21:28:52 +0000161 Value *VResult = Src0;
Ayman Musac5490e52017-05-15 11:30:54 +0000162
Craig Topper8b4f0e12018-09-27 22:31:42 +0000163 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000164 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000165 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000166 continue;
167 Value *Gep =
168 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
169 LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
170 VResult =
171 Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
172 }
Craig Topper7d234d62018-09-27 21:28:52 +0000173 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000174 CI->eraseFromParent();
175 return;
176 }
177
Ayman Musac5490e52017-05-15 11:30:54 +0000178 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musac5490e52017-05-15 11:30:54 +0000179 // Fill the "else" block, created in the previous iteration
180 //
181 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
182 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
Craig Topper04236812018-09-27 18:01:48 +0000183 // br i1 %mask_1, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000184 //
Ayman Musac5490e52017-05-15 11:30:54 +0000185
186 Value *Predicate =
187 Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000188
189 // Create "cond" block
190 //
191 // %EltAddr = getelementptr i32* %1, i32 0
192 // %Elt = load i32* %EltAddr
193 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
194 //
Craig Topper4104c002018-10-30 20:33:58 +0000195 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
196 "cond.load");
Ayman Musac5490e52017-05-15 11:30:54 +0000197 Builder.SetInsertPoint(InsertPt);
198
199 Value *Gep =
200 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
201 LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
Craig Topper7d234d62018-09-27 21:28:52 +0000202 Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
203 Builder.getInt32(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000204
205 // Create "else" block, fill it in the next iteration
206 BasicBlock *NewIfBlock =
207 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
208 Builder.SetInsertPoint(InsertPt);
209 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000210 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000211 OldBr->eraseFromParent();
Craig Topper4104c002018-10-30 20:33:58 +0000212 BasicBlock *PrevIfBlock = IfBlock;
Ayman Musac5490e52017-05-15 11:30:54 +0000213 IfBlock = NewIfBlock;
Craig Topper7d234d62018-09-27 21:28:52 +0000214
215 // Create the phi to join the new and previous value.
216 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
217 Phi->addIncoming(NewVResult, CondBlock);
218 Phi->addIncoming(VResult, PrevIfBlock);
219 VResult = Phi;
Ayman Musac5490e52017-05-15 11:30:54 +0000220 }
221
Craig Topper7d234d62018-09-27 21:28:52 +0000222 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000223 CI->eraseFromParent();
224}
225
226// Translate a masked store intrinsic, like
227// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
228// <16 x i1> %mask)
229// to a chain of basic blocks, that stores element one-by-one if
230// the appropriate mask bit is set
231//
232// %1 = bitcast i8* %addr to i32*
233// %2 = extractelement <16 x i1> %mask, i32 0
Craig Topper49dad8b2018-09-27 21:28:39 +0000234// br i1 %2, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000235//
236// cond.store: ; preds = %0
Craig Topper49dad8b2018-09-27 21:28:39 +0000237// %3 = extractelement <16 x i32> %val, i32 0
238// %4 = getelementptr i32* %1, i32 0
239// store i32 %3, i32* %4
Ayman Musac5490e52017-05-15 11:30:54 +0000240// br label %else
241//
242// else: ; preds = %0, %cond.store
Craig Topper49dad8b2018-09-27 21:28:39 +0000243// %5 = extractelement <16 x i1> %mask, i32 1
244// br i1 %5, label %cond.store1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000245//
246// cond.store1: ; preds = %else
Craig Topper49dad8b2018-09-27 21:28:39 +0000247// %6 = extractelement <16 x i32> %val, i32 1
248// %7 = getelementptr i32* %1, i32 1
249// store i32 %6, i32* %7
Ayman Musac5490e52017-05-15 11:30:54 +0000250// br label %else2
251// . . .
252static void scalarizeMaskedStore(CallInst *CI) {
253 Value *Src = CI->getArgOperand(0);
254 Value *Ptr = CI->getArgOperand(1);
255 Value *Alignment = CI->getArgOperand(2);
256 Value *Mask = CI->getArgOperand(3);
257
258 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Craig Topper10ec0212018-09-27 22:31:40 +0000259 VectorType *VecType = cast<VectorType>(Src->getType());
Ayman Musac5490e52017-05-15 11:30:54 +0000260
261 Type *EltTy = VecType->getElementType();
262
263 IRBuilder<> Builder(CI->getContext());
264 Instruction *InsertPt = CI;
265 BasicBlock *IfBlock = CI->getParent();
266 Builder.SetInsertPoint(InsertPt);
267 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
268
269 // Short-cut if the mask is all-true.
Craig Topperdfe460d2018-09-27 21:28:41 +0000270 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Ayman Musac5490e52017-05-15 11:30:54 +0000271 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
272 CI->eraseFromParent();
273 return;
274 }
275
276 // Adjust alignment for the scalar instruction.
Craig Topperbb50c382018-09-28 03:35:37 +0000277 AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
Ayman Musac5490e52017-05-15 11:30:54 +0000278 // Bitcast %addr fron i8* to EltTy*
279 Type *NewPtrType =
280 EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
281 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
282 unsigned VectorWidth = VecType->getNumElements();
283
Craig Topper8b4f0e12018-09-27 22:31:42 +0000284 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000285 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000286 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000287 continue;
288 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
289 Value *Gep =
290 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
291 Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
292 }
293 CI->eraseFromParent();
294 return;
295 }
296
297 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musac5490e52017-05-15 11:30:54 +0000298 // Fill the "else" block, created in the previous iteration
299 //
300 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
Craig Topper04236812018-09-27 18:01:48 +0000301 // br i1 %mask_1, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000302 //
303 Value *Predicate =
304 Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000305
306 // Create "cond" block
307 //
308 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
309 // %EltAddr = getelementptr i32* %1, i32 0
310 // %store i32 %OneElt, i32* %EltAddr
311 //
312 BasicBlock *CondBlock =
313 IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
314 Builder.SetInsertPoint(InsertPt);
315
316 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
317 Value *Gep =
318 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
319 Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
320
321 // Create "else" block, fill it in the next iteration
322 BasicBlock *NewIfBlock =
323 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
324 Builder.SetInsertPoint(InsertPt);
325 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000326 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000327 OldBr->eraseFromParent();
328 IfBlock = NewIfBlock;
329 }
330 CI->eraseFromParent();
331}
332
333// Translate a masked gather intrinsic like
334// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
335// <16 x i1> %Mask, <16 x i32> %Src)
336// to a chain of basic blocks, with loading element one-by-one if
337// the appropriate mask bit is set
338//
Craig Topper49dad8b2018-09-27 21:28:39 +0000339// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
340// %Mask0 = extractelement <16 x i1> %Mask, i32 0
341// br i1 %Mask0, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000342//
343// cond.load:
Craig Topper49dad8b2018-09-27 21:28:39 +0000344// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
345// %Load0 = load i32, i32* %Ptr0, align 4
346// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
Ayman Musac5490e52017-05-15 11:30:54 +0000347// br label %else
348//
349// else:
Craig Topper49dad8b2018-09-27 21:28:39 +0000350// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
351// %Mask1 = extractelement <16 x i1> %Mask, i32 1
352// br i1 %Mask1, label %cond.load1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000353//
354// cond.load1:
Craig Topper49dad8b2018-09-27 21:28:39 +0000355// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
356// %Load1 = load i32, i32* %Ptr1, align 4
357// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
Ayman Musac5490e52017-05-15 11:30:54 +0000358// br label %else2
359// . . .
Craig Topper49dad8b2018-09-27 21:28:39 +0000360// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
Ayman Musac5490e52017-05-15 11:30:54 +0000361// ret <16 x i32> %Result
362static void scalarizeMaskedGather(CallInst *CI) {
363 Value *Ptrs = CI->getArgOperand(0);
364 Value *Alignment = CI->getArgOperand(1);
365 Value *Mask = CI->getArgOperand(2);
366 Value *Src0 = CI->getArgOperand(3);
367
Craig Topper10ec0212018-09-27 22:31:40 +0000368 VectorType *VecType = cast<VectorType>(CI->getType());
Ayman Musac5490e52017-05-15 11:30:54 +0000369
370 IRBuilder<> Builder(CI->getContext());
371 Instruction *InsertPt = CI;
372 BasicBlock *IfBlock = CI->getParent();
Ayman Musac5490e52017-05-15 11:30:54 +0000373 Builder.SetInsertPoint(InsertPt);
374 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
375
376 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
377
Ayman Musac5490e52017-05-15 11:30:54 +0000378 // The result vector
Craig Topper6911bfe2018-09-27 21:28:59 +0000379 Value *VResult = Src0;
Ayman Musac5490e52017-05-15 11:30:54 +0000380 unsigned VectorWidth = VecType->getNumElements();
381
382 // Shorten the way if the mask is a vector of constants.
Craig Topper8b4f0e12018-09-27 22:31:42 +0000383 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000384 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000385 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000386 continue;
387 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
388 "Ptr" + Twine(Idx));
389 LoadInst *Load =
390 Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
391 VResult = Builder.CreateInsertElement(
392 VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
393 }
Craig Topper6911bfe2018-09-27 21:28:59 +0000394 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000395 CI->eraseFromParent();
396 return;
397 }
398
Ayman Musac5490e52017-05-15 11:30:54 +0000399 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musac5490e52017-05-15 11:30:54 +0000400 // Fill the "else" block, created in the previous iteration
401 //
402 // %Mask1 = extractelement <16 x i1> %Mask, i32 1
Craig Topper04236812018-09-27 18:01:48 +0000403 // br i1 %Mask1, label %cond.load, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000404 //
Ayman Musac5490e52017-05-15 11:30:54 +0000405
406 Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
407 "Mask" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000408
409 // Create "cond" block
410 //
411 // %EltAddr = getelementptr i32* %1, i32 0
412 // %Elt = load i32* %EltAddr
413 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
414 //
Craig Topper4104c002018-10-30 20:33:58 +0000415 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Ayman Musac5490e52017-05-15 11:30:54 +0000416 Builder.SetInsertPoint(InsertPt);
417
418 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
419 "Ptr" + Twine(Idx));
420 LoadInst *Load =
421 Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
Craig Topper6911bfe2018-09-27 21:28:59 +0000422 Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
423 Builder.getInt32(Idx),
424 "Res" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000425
426 // Create "else" block, fill it in the next iteration
427 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
428 Builder.SetInsertPoint(InsertPt);
429 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000430 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000431 OldBr->eraseFromParent();
Craig Topper4104c002018-10-30 20:33:58 +0000432 BasicBlock *PrevIfBlock = IfBlock;
Ayman Musac5490e52017-05-15 11:30:54 +0000433 IfBlock = NewIfBlock;
Craig Topper6911bfe2018-09-27 21:28:59 +0000434
435 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
436 Phi->addIncoming(NewVResult, CondBlock);
437 Phi->addIncoming(VResult, PrevIfBlock);
438 VResult = Phi;
Ayman Musac5490e52017-05-15 11:30:54 +0000439 }
440
Craig Topper6911bfe2018-09-27 21:28:59 +0000441 CI->replaceAllUsesWith(VResult);
Ayman Musac5490e52017-05-15 11:30:54 +0000442 CI->eraseFromParent();
443}
444
445// Translate a masked scatter intrinsic, like
446// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
447// <16 x i1> %Mask)
448// to a chain of basic blocks, that stores element one-by-one if
449// the appropriate mask bit is set.
450//
Craig Topper49dad8b2018-09-27 21:28:39 +0000451// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
452// %Mask0 = extractelement <16 x i1> %Mask, i32 0
453// br i1 %Mask0, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000454//
455// cond.store:
Craig Topper49dad8b2018-09-27 21:28:39 +0000456// %Elt0 = extractelement <16 x i32> %Src, i32 0
457// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
458// store i32 %Elt0, i32* %Ptr0, align 4
Ayman Musac5490e52017-05-15 11:30:54 +0000459// br label %else
460//
461// else:
Craig Topper49dad8b2018-09-27 21:28:39 +0000462// %Mask1 = extractelement <16 x i1> %Mask, i32 1
463// br i1 %Mask1, label %cond.store1, label %else2
Ayman Musac5490e52017-05-15 11:30:54 +0000464//
465// cond.store1:
Craig Topper49dad8b2018-09-27 21:28:39 +0000466// %Elt1 = extractelement <16 x i32> %Src, i32 1
467// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
468// store i32 %Elt1, i32* %Ptr1, align 4
Ayman Musac5490e52017-05-15 11:30:54 +0000469// br label %else2
470// . . .
471static void scalarizeMaskedScatter(CallInst *CI) {
472 Value *Src = CI->getArgOperand(0);
473 Value *Ptrs = CI->getArgOperand(1);
474 Value *Alignment = CI->getArgOperand(2);
475 Value *Mask = CI->getArgOperand(3);
476
477 assert(isa<VectorType>(Src->getType()) &&
478 "Unexpected data type in masked scatter intrinsic");
479 assert(isa<VectorType>(Ptrs->getType()) &&
480 isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
481 "Vector of pointers is expected in masked scatter intrinsic");
482
483 IRBuilder<> Builder(CI->getContext());
484 Instruction *InsertPt = CI;
485 BasicBlock *IfBlock = CI->getParent();
486 Builder.SetInsertPoint(InsertPt);
487 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
488
489 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
490 unsigned VectorWidth = Src->getType()->getVectorNumElements();
491
492 // Shorten the way if the mask is a vector of constants.
Craig Topper8b4f0e12018-09-27 22:31:42 +0000493 if (isConstantIntVector(Mask)) {
Ayman Musac5490e52017-05-15 11:30:54 +0000494 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topperdfc0f282018-09-27 21:28:46 +0000495 if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musac5490e52017-05-15 11:30:54 +0000496 continue;
497 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
498 "Elt" + Twine(Idx));
499 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
500 "Ptr" + Twine(Idx));
501 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
502 }
503 CI->eraseFromParent();
504 return;
505 }
Craig Topperdfe460d2018-09-27 21:28:41 +0000506
Ayman Musac5490e52017-05-15 11:30:54 +0000507 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
508 // Fill the "else" block, created in the previous iteration
509 //
Craig Topper04236812018-09-27 18:01:48 +0000510 // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
511 // br i1 %Mask1, label %cond.store, label %else
Ayman Musac5490e52017-05-15 11:30:54 +0000512 //
513 Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
514 "Mask" + Twine(Idx));
Ayman Musac5490e52017-05-15 11:30:54 +0000515
516 // Create "cond" block
517 //
Craig Topper49dad8b2018-09-27 21:28:39 +0000518 // %Elt1 = extractelement <16 x i32> %Src, i32 1
519 // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
520 // %store i32 %Elt1, i32* %Ptr1
Ayman Musac5490e52017-05-15 11:30:54 +0000521 //
522 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
523 Builder.SetInsertPoint(InsertPt);
524
525 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
526 "Elt" + Twine(Idx));
527 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
528 "Ptr" + Twine(Idx));
529 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
530
531 // Create "else" block, fill it in the next iteration
532 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
533 Builder.SetInsertPoint(InsertPt);
534 Instruction *OldBr = IfBlock->getTerminator();
Craig Topper04236812018-09-27 18:01:48 +0000535 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musac5490e52017-05-15 11:30:54 +0000536 OldBr->eraseFromParent();
537 IfBlock = NewIfBlock;
538 }
539 CI->eraseFromParent();
540}
541
542bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
Ayman Musac5490e52017-05-15 11:30:54 +0000543 bool EverMadeChange = false;
544
545 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
546
547 bool MadeChange = true;
548 while (MadeChange) {
549 MadeChange = false;
550 for (Function::iterator I = F.begin(); I != F.end();) {
551 BasicBlock *BB = &*I++;
552 bool ModifiedDTOnIteration = false;
553 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
554
555 // Restart BB iteration if the dominator tree of the Function was changed
556 if (ModifiedDTOnIteration)
557 break;
558 }
559
560 EverMadeChange |= MadeChange;
561 }
562
563 return EverMadeChange;
564}
565
566bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
567 bool MadeChange = false;
568
569 BasicBlock::iterator CurInstIterator = BB.begin();
570 while (CurInstIterator != BB.end()) {
571 if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
572 MadeChange |= optimizeCallInst(CI, ModifiedDT);
573 if (ModifiedDT)
574 return true;
575 }
576
577 return MadeChange;
578}
579
580bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
581 bool &ModifiedDT) {
Ayman Musac5490e52017-05-15 11:30:54 +0000582 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
583 if (II) {
584 switch (II->getIntrinsicID()) {
585 default:
586 break;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +0000587 case Intrinsic::masked_load:
Ayman Musac5490e52017-05-15 11:30:54 +0000588 // Scalarize unsupported vector masked load
589 if (!TTI->isLegalMaskedLoad(CI->getType())) {
590 scalarizeMaskedLoad(CI);
591 ModifiedDT = true;
592 return true;
593 }
594 return false;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +0000595 case Intrinsic::masked_store:
Ayman Musac5490e52017-05-15 11:30:54 +0000596 if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
597 scalarizeMaskedStore(CI);
598 ModifiedDT = true;
599 return true;
600 }
601 return false;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +0000602 case Intrinsic::masked_gather:
Ayman Musac5490e52017-05-15 11:30:54 +0000603 if (!TTI->isLegalMaskedGather(CI->getType())) {
604 scalarizeMaskedGather(CI);
605 ModifiedDT = true;
606 return true;
607 }
608 return false;
Eugene Zelenkofa57bd02017-09-27 23:26:01 +0000609 case Intrinsic::masked_scatter:
Ayman Musac5490e52017-05-15 11:30:54 +0000610 if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
611 scalarizeMaskedScatter(CI);
612 ModifiedDT = true;
613 return true;
614 }
615 return false;
616 }
Ayman Musac5490e52017-05-15 11:30:54 +0000617 }
618
619 return false;
620}