blob: 1e5341a36e914f0afce280991aa82b4cb3154a71 [file] [log] [blame]
Hongbin Zheng3b11a162012-04-25 13:16:49 +00001//===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the BlockGenerator and VectorBlockGenerator classes,
11// which generate sequential code and vectorized code for a polyhedral
12// statement, respectively.
13//
14//===----------------------------------------------------------------------===//
15
16#include "polly/ScopInfo.h"
Hongbin Zheng68794212012-05-06 10:22:19 +000017#include "polly/CodeGen/CodeGeneration.h"
Hongbin Zheng8a846612012-04-25 13:18:28 +000018#include "polly/CodeGen/BlockGenerators.h"
Hongbin Zheng3b11a162012-04-25 13:16:49 +000019#include "polly/Support/GICHelper.h"
20
Tobias Grossere71c6ab2012-04-27 16:36:14 +000021#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/ScalarEvolution.h"
23#include "llvm/Analysis/ScalarEvolutionExpander.h"
Hongbin Zheng3b11a162012-04-25 13:16:49 +000024#include "llvm/Transforms/Utils/BasicBlockUtils.h"
25#include "llvm/Support/CommandLine.h"
26
27#include "isl/aff.h"
28#include "isl/set.h"
29
30using namespace llvm;
31using namespace polly;
32
33static cl::opt<bool>
34Aligned("enable-polly-aligned",
35 cl::desc("Assumed aligned memory accesses."), cl::Hidden,
36 cl::value_desc("OpenMP code generation enabled if true"),
37 cl::init(false), cl::ZeroOrMore);
38
39static cl::opt<bool>
Tobias Grossere71c6ab2012-04-27 16:36:14 +000040SCEVCodegen("polly-codegen-scev",
41 cl::desc("Use SCEV based code generation."), cl::Hidden,
42 cl::init(false), cl::ZeroOrMore);
43
44/// The SCEVRewriter takes a scalar evolution expression and updates the
45/// following components:
46///
47/// - SCEVUnknown
48///
49/// Values referenced in SCEVUnknown subexpressions are looked up in
50/// two Value to Value maps (GlobalMap and BBMap). If they are found they are
51/// replaced by a reference to the value they map to.
52///
53/// - SCEVAddRecExpr
54///
55/// Based on a Loop -> Value map {Loop_1: %Value}, an expression
56/// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57/// AddRecExpr's with more than two operands can not be translated.
58///
59/// FIXME: The comment above is not yet reality. At the moment we derive
60/// %Value by looking up the canonical IV of the loop and by defining
61/// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62/// canonical induction variables.
63///
64///
65/// How can this be used?
66/// ====================
67///
68/// SCEVRewrite based code generation works on virtually independent blocks.
69/// This means we do not run the independent blocks pass to rewrite scalar
70/// instructions, but just ignore instructions that we can analyze with scalar
71/// evolution. Virtually independent blocks are blocks that only reference the
72/// following values:
73///
74/// o Values calculated within a basic block
75/// o Values representable by SCEV
76///
77/// During code generation we can ignore all instructions:
78///
79/// - Ignore all instructions except:
80/// - Load instructions
81/// - Instructions that reference operands already calculated within the
82/// basic block.
83/// - Store instructions
84struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85public:
86 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
87 ValueMapT &GlobalMap, ValueMapT &BBMap) {
88 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89 return Rewriter.visit(scev);
90 }
91
92 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94 BBMap(BBMap) {}
95
96 const SCEV *visit(const SCEV *Expr) {
97 // FIXME: The parameter handling is incorrect.
98 //
99 // Polly does only detect parameters in Access function and loop iteration
100 // counters, but it does not get parameters that are just used by
101 // instructions within the basic block.
102 //
103 // There are two options to solve this:
104 // o Iterate over all instructions of the SCoP and find the actual
105 // parameters.
106 // o Just check within the SCEVRewriter if Values lay outside of the SCoP
107 // and detect parameters on the fly.
108 //
109 // This is especially important for OpenMP and GPGPU code generation, as
110 // they require us to detect and possibly rewrite the corresponding
111 // parameters.
112 if (isl_id *Id = S.getIdForParam(Expr)) {
113 isl_id_free(Id);
114 return Expr;
115 }
116
117
118 return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119 }
120
121 const SCEV *visitConstant(const SCEVConstant *Constant) {
122 return Constant;
123 }
124
125 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126 const SCEV *Operand = visit(Expr->getOperand());
127 return SE.getTruncateExpr(Operand, Expr->getType());
128 }
129
130 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131 const SCEV *Operand = visit(Expr->getOperand());
132 return SE.getZeroExtendExpr(Operand, Expr->getType());
133 }
134
135 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136 const SCEV *Operand = visit(Expr->getOperand());
137 return SE.getSignExtendExpr(Operand, Expr->getType());
138 }
139
140 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141 SmallVector<const SCEV *, 2> Operands;
142 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143 const SCEV *Operand = visit(Expr->getOperand(i));
144 Operands.push_back(Operand);
145 }
146
147 return SE.getAddExpr(Operands);
148 }
149
150 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151 SmallVector<const SCEV *, 2> Operands;
152 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153 const SCEV *Operand = visit(Expr->getOperand(i));
154 Operands.push_back(Operand);
155 }
156
157 return SE.getMulExpr(Operands);
158 }
159
160 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162 }
163
164 // Return a new induction variable if the loop is within the original SCoP
165 // or NULL otherwise.
166 Value *getNewIV(const Loop *L) {
167 Value *IV = L->getCanonicalInductionVariable();
168 if (!IV)
169 return NULL;
170
171 ValueMapT::iterator NewIV = GlobalMap.find(IV);
172
173 if (NewIV == GlobalMap.end())
174 return NULL;
175
176 return NewIV->second;
177 }
178
179 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180 Value *IV;
181
182 IV = getNewIV(Expr->getLoop());
183
184 // The IV is not within the GlobalMaps. So do not rewrite it and also do
185 // not rewrite any descendants.
186 if (!IV)
187 return Expr;
188
189 assert(Expr->getNumOperands() == 2
190 && "An AddRecExpr with more than two operands can not be rewritten.");
191
192 const SCEV *Base, *Step, *IVExpr, *Product;
193
194 Base = visit(Expr->getStart());
195 Step = visit(Expr->getOperand(1));
196 IVExpr = SE.getUnknown(IV);
197 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198 Product = SE.getMulExpr(Step, IVExpr);
199
200 return SE.getAddExpr(Base, Product);
201 }
202
203 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204 SmallVector<const SCEV *, 2> Operands;
205 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206 const SCEV *Operand = visit(Expr->getOperand(i));
207 Operands.push_back(Operand);
208 }
209
210 return SE.getSMaxExpr(Operands);
211 }
212
213 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214 SmallVector<const SCEV *, 2> Operands;
215 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216 const SCEV *Operand = visit(Expr->getOperand(i));
217 Operands.push_back(Operand);
218 }
219
220 return SE.getUMaxExpr(Operands);
221 }
222
223 const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224 Value *V = Expr->getValue();
225
226 if (GlobalMap.count(V))
227 return SE.getUnknown(GlobalMap[V]);
228
229 if (BBMap.count(V))
230 return SE.getUnknown(BBMap[V]);
231
232 return Expr;
233 }
234
235private:
236 Scop &S;
237 ScalarEvolution &SE;
238 ValueMapT &GlobalMap;
239 ValueMapT &BBMap;
240};
241
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000242// Helper class to generate memory location.
243namespace {
244class IslGenerator {
245public:
246 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
247 Builder(Builder), IVS(IVS) {}
248 Value *generateIslInt(__isl_take isl_int Int);
249 Value *generateIslAff(__isl_take isl_aff *Aff);
250 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
251
252private:
253 typedef struct {
254 Value *Result;
255 class IslGenerator *Generator;
256 } IslGenInfo;
257
258 IRBuilder<> &Builder;
259 std::vector<Value *> &IVS;
260 static int mergeIslAffValues(__isl_take isl_set *Set,
261 __isl_take isl_aff *Aff, void *User);
262};
263}
264
265
266Value *IslGenerator::generateIslInt(isl_int Int) {
267 mpz_t IntMPZ;
268 mpz_init(IntMPZ);
269 isl_int_get_gmp(Int, IntMPZ);
270 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
271 mpz_clear(IntMPZ);
272 return IntValue;
273}
274
275Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
276 Value *Result;
277 Value *ConstValue;
278 isl_int ConstIsl;
279
280 isl_int_init(ConstIsl);
281 isl_aff_get_constant(Aff, &ConstIsl);
282 ConstValue = generateIslInt(ConstIsl);
283 Type *Ty = Builder.getInt64Ty();
284
285 // FIXME: We should give the constant and coefficients the right type. Here
286 // we force it into i64.
287 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
288
289 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
290
291 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
292 "must match the dimension of the affine space.");
293
294 isl_int CoefficientIsl;
295 isl_int_init(CoefficientIsl);
296
297 for (unsigned int i = 0; i < NbInputDims; ++i) {
298 Value *CoefficientValue;
299 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
300
301 if (isl_int_is_zero(CoefficientIsl))
302 continue;
303
304 CoefficientValue = generateIslInt(CoefficientIsl);
305 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
306 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
307 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
308 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
309 }
310
311 isl_int_clear(CoefficientIsl);
312 isl_int_clear(ConstIsl);
313 isl_aff_free(Aff);
314
315 return Result;
316}
317
318int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
319 __isl_take isl_aff *Aff, void *User) {
320 IslGenInfo *GenInfo = (IslGenInfo *)User;
321
322 assert((GenInfo->Result == NULL) && "Result is already set."
323 "Currently only single isl_aff is supported");
324 assert(isl_set_plain_is_universe(Set)
325 && "Code generation failed because the set is not universe");
326
327 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
328
329 isl_set_free(Set);
330 return 0;
331}
332
333Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
334 IslGenInfo User;
335 User.Result = NULL;
336 User.Generator = this;
337 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
338 assert(User.Result && "Code generation for isl_pw_aff failed");
339
340 isl_pw_aff_free(PwAff);
341 return User.Result;
342}
343
344
345BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000346 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
347
348bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
349 if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
350 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
351 if (!isa<SCEVCouldNotCompute>(Scev)) {
352 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
353 if (Unknown->getValue() != Inst)
354 return true;
355 } else {
356 return true;
357 }
358 }
359
360 return false;
361}
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000362
363Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
364 ValueMapT &GlobalMap) {
365 // We assume constants never change.
366 // This avoids map lookups for many calls to this function.
367 if (isa<Constant>(Old))
368 return const_cast<Value*>(Old);
369
370 if (GlobalMap.count(Old)) {
371 Value *New = GlobalMap[Old];
372
373 if (Old->getType()->getScalarSizeInBits()
374 < New->getType()->getScalarSizeInBits())
375 New = Builder.CreateTruncOrBitCast(New, Old->getType());
376
377 return New;
378 }
379
380 if (BBMap.count(Old)) {
381 return BBMap[Old];
382 }
383
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000384 if (SCEVCodegen && SE.isSCEVable(Old->getType()))
385 if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
386 if (!isa<SCEVCouldNotCompute>(Scev)) {
387 const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
388 *Statement.getParent(), SE,
389 GlobalMap, BBMap);
390 SCEVExpander Expander(SE, "polly");
391 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
392 Builder.GetInsertPoint());
393
394 BBMap[Old] = Expanded;
395 return Expanded;
396 }
397
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000398 // 'Old' is within the original SCoP, but was not rewritten.
399 //
400 // Such values appear, if they only calculate information already available in
401 // the polyhedral description (e.g. an induction variable increment). They
402 // can be safely ignored.
403 if (const Instruction *Inst = dyn_cast<Instruction>(Old))
404 if (Statement.getParent()->getRegion().contains(Inst->getParent()))
405 return NULL;
406
407 // Everything else is probably a scop-constant value defined as global,
408 // function parameter or an instruction not within the scop.
409 return const_cast<Value*>(Old);
410}
411
412void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
413 ValueMapT &GlobalMap) {
414 Instruction *NewInst = Inst->clone();
415
416 // Replace old operands with the new ones.
417 for (Instruction::const_op_iterator OI = Inst->op_begin(),
418 OE = Inst->op_end(); OI != OE; ++OI) {
419 Value *OldOperand = *OI;
420 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
421
422 if (!NewOperand) {
423 assert(!isa<StoreInst>(NewInst)
424 && "Store instructions are always needed!");
425 delete NewInst;
426 return;
427 }
428
429 NewInst->replaceUsesOfWith(OldOperand, NewOperand);
430 }
431
432 Builder.Insert(NewInst);
433 BBMap[Inst] = NewInst;
434
435 if (!NewInst->getType()->isVoidTy())
436 NewInst->setName("p_" + Inst->getName());
437}
438
439std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
440 __isl_keep isl_map *AccessRelation, Value *BaseAddress,
441 ValueMapT &BBMap, ValueMapT &GlobalMap) {
442
443 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
444 && "Only single dimensional access functions supported");
445
446 std::vector<Value *> IVS;
447 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
448 const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
449 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
450 IVS.push_back(NewIV);
451 }
452
453 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
454 IslGenerator IslGen(Builder, IVS);
455 Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
456
457 Type *Ty = Builder.getInt64Ty();
458 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
459
460 std::vector<Value*> IndexArray;
461 Value *NullValue = Constant::getNullValue(Ty);
462 IndexArray.push_back(NullValue);
463 IndexArray.push_back(OffsetValue);
464 return IndexArray;
465}
466
467Value *BlockGenerator::getNewAccessOperand(
468 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
469 ValueMapT &BBMap, ValueMapT &GlobalMap) {
470 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
471 BaseAddress,
472 BBMap, GlobalMap);
473 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
474 "p_newarrayidx_");
475 return NewOperand;
476}
477
478Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
479 const Value *Pointer,
480 ValueMapT &BBMap,
481 ValueMapT &GlobalMap) {
482 MemoryAccess &Access = Statement.getAccessFor(Inst);
483 isl_map *CurrentAccessRelation = Access.getAccessRelation();
484 isl_map *NewAccessRelation = Access.getNewAccessRelation();
485
486 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
487 && "Current and new access function use different spaces");
488
489 Value *NewPointer;
490
491 if (!NewAccessRelation) {
492 NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
493 } else {
494 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
495 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
496 BBMap, GlobalMap);
497 }
498
499 isl_map_free(CurrentAccessRelation);
500 isl_map_free(NewAccessRelation);
501 return NewPointer;
502}
503
504Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
505 ValueMapT &BBMap,
506 ValueMapT &GlobalMap) {
507 const Value *Pointer = Load->getPointerOperand();
508 const Instruction *Inst = dyn_cast<Instruction>(Load);
509 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
510 Value *ScalarLoad = Builder.CreateLoad(NewPointer,
511 Load->getName() + "_p_scalar_");
512 return ScalarLoad;
513}
514
515Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
516 ValueMapT &BBMap,
517 ValueMapT &GlobalMap) {
518 const Value *Pointer = Store->getPointerOperand();
519 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
520 GlobalMap);
521 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
522
523 return Builder.CreateStore(ValueOperand, NewPointer);
524}
525
526void BlockGenerator::copyInstruction(const Instruction *Inst,
527 ValueMapT &BBMap, ValueMapT &GlobalMap) {
528 // Terminator instructions control the control flow. They are explicitly
529 // expressed in the clast and do not need to be copied.
530 if (Inst->isTerminator())
531 return;
532
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000533 if (isSCEVIgnore(Inst))
534 return;
535
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000536 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
537 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
538 return;
539 }
540
541 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
542 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
543 return;
544 }
545
546 copyInstScalar(Inst, BBMap, GlobalMap);
547}
548
549
550void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
551 BasicBlock *BB = Statement.getBasicBlock();
552 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
553 Builder.GetInsertPoint(), P);
554 CopyBB->setName("polly.stmt." + BB->getName());
555 Builder.SetInsertPoint(CopyBB->begin());
556
557 ValueMapT BBMap;
558
559 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
560 ++II)
561 copyInstruction(II, BBMap, GlobalMap);
562}
563
564VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
Sebastian Popa00a0292012-12-18 07:46:06 +0000565 VectorValueMapT &GlobalMaps,
566 ScopStmt &Stmt,
567 __isl_keep isl_map *Schedule,
568 Pass *P)
569 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
570 assert(GlobalMaps.size() > 1 && "Only one vector lane found");
571 assert(Schedule && "No statement domain provided");
572}
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000573
574Value *VectorBlockGenerator::getVectorValue(const Value *Old,
575 ValueMapT &VectorMap,
576 VectorValueMapT &ScalarMaps) {
577 if (VectorMap.count(Old))
578 return VectorMap[Old];
579
580 int Width = getVectorWidth();
581
582 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
583
584 for (int Lane = 0; Lane < Width; Lane++)
585 Vector = Builder.CreateInsertElement(Vector,
586 getNewValue(Old,
587 ScalarMaps[Lane],
588 GlobalMaps[Lane]),
589 Builder.getInt32(Lane));
590
591 VectorMap[Old] = Vector;
592
593 return Vector;
594}
595
596Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
597 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
598 assert(PointerTy && "PointerType expected");
599
600 Type *ScalarType = PointerTy->getElementType();
601 VectorType *VectorType = VectorType::get(ScalarType, Width);
602
603 return PointerType::getUnqual(VectorType);
604}
605
606Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
607 ValueMapT &BBMap) {
608 const Value *Pointer = Load->getPointerOperand();
609 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
610 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
611 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
612 "vector_ptr");
613 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
614 Load->getName() + "_p_vec_full");
615 if (!Aligned)
616 VecLoad->setAlignment(8);
617
618 return VecLoad;
619}
620
621Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
622 ValueMapT &BBMap) {
623 const Value *Pointer = Load->getPointerOperand();
624 Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
625 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
626 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
627 Load->getName() + "_p_vec_p");
628 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
629 Load->getName() + "_p_splat_one");
630
631 if (!Aligned)
632 ScalarLoad->setAlignment(8);
633
634 Constant *SplatVector =
635 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
636 getVectorWidth()));
637
638 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
639 SplatVector,
640 Load->getName()
641 + "_p_splat");
642 return VectorLoad;
643}
644
645Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
646 VectorValueMapT &ScalarMaps) {
647 int VectorWidth = getVectorWidth();
648 const Value *Pointer = Load->getPointerOperand();
649 VectorType *VectorType = VectorType::get(
650 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
651
652 Value *Vector = UndefValue::get(VectorType);
653
654 for (int i = 0; i < VectorWidth; i++) {
655 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
656 Value *ScalarLoad = Builder.CreateLoad(NewPointer,
657 Load->getName() + "_p_scalar_");
658 Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
659 Builder.getInt32(i),
660 Load->getName() + "_p_vec_");
661 }
662
663 return Vector;
664}
665
666void VectorBlockGenerator::generateLoad(const LoadInst *Load,
667 ValueMapT &VectorMap,
668 VectorValueMapT &ScalarMaps) {
Hongbin Zheng68794212012-05-06 10:22:19 +0000669 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
670 !VectorType::isValidElementType(Load->getType())) {
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000671 for (int i = 0; i < getVectorWidth(); i++)
672 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
673 GlobalMaps[i]);
674 return;
675 }
676
677 MemoryAccess &Access = Statement.getAccessFor(Load);
678
679 Value *NewLoad;
Sebastian Popa00a0292012-12-18 07:46:06 +0000680 if (Access.isStrideZero(isl_map_copy(Schedule)))
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000681 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
Sebastian Popa00a0292012-12-18 07:46:06 +0000682 else if (Access.isStrideOne(isl_map_copy(Schedule)))
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000683 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
684 else
685 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
686
687 VectorMap[Load] = NewLoad;
688}
689
690void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
691 ValueMapT &VectorMap,
692 VectorValueMapT &ScalarMaps) {
693 int VectorWidth = getVectorWidth();
694 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
695 ScalarMaps);
696
697 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
698
699 const CastInst *Cast = dyn_cast<CastInst>(Inst);
700 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
701 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
702}
703
704void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
705 ValueMapT &VectorMap,
706 VectorValueMapT &ScalarMaps) {
707 Value *OpZero = Inst->getOperand(0);
708 Value *OpOne = Inst->getOperand(1);
709
710 Value *NewOpZero, *NewOpOne;
711 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
712 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
713
714 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero,
715 NewOpOne,
716 Inst->getName() + "p_vec");
717 VectorMap[Inst] = NewInst;
718}
719
720void VectorBlockGenerator::copyStore(const StoreInst *Store,
721 ValueMapT &VectorMap,
722 VectorValueMapT &ScalarMaps) {
723 int VectorWidth = getVectorWidth();
724
725 MemoryAccess &Access = Statement.getAccessFor(Store);
726
727 const Value *Pointer = Store->getPointerOperand();
728 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
729 ScalarMaps);
730
Sebastian Popa00a0292012-12-18 07:46:06 +0000731 if (Access.isStrideOne(isl_map_copy(Schedule))) {
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000732 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
733 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
734
735 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
736 "vector_ptr");
737 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
738
739 if (!Aligned)
740 Store->setAlignment(8);
741 } else {
742 for (unsigned i = 0; i < ScalarMaps.size(); i++) {
743 Value *Scalar = Builder.CreateExtractElement(Vector,
744 Builder.getInt32(i));
745 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
746 Builder.CreateStore(Scalar, NewPointer);
747 }
748 }
749}
750
751bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
752 ValueMapT &VectorMap) {
753 for (Instruction::const_op_iterator OI = Inst->op_begin(),
754 OE = Inst->op_end(); OI != OE; ++OI)
755 if (VectorMap.count(*OI))
756 return true;
757 return false;
758}
759
760bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
761 ValueMapT &VectorMap,
762 VectorValueMapT &ScalarMaps) {
763 bool HasVectorOperand = false;
764 int VectorWidth = getVectorWidth();
765
766 for (Instruction::const_op_iterator OI = Inst->op_begin(),
767 OE = Inst->op_end(); OI != OE; ++OI) {
768 ValueMapT::iterator VecOp = VectorMap.find(*OI);
769
770 if (VecOp == VectorMap.end())
771 continue;
772
773 HasVectorOperand = true;
774 Value *NewVector = VecOp->second;
775
776 for (int i = 0; i < VectorWidth; ++i) {
777 ValueMapT &SM = ScalarMaps[i];
778
779 // If there is one scalar extracted, all scalar elements should have
780 // already been extracted by the code here. So no need to check for the
781 // existance of all of them.
782 if (SM.count(*OI))
783 break;
784
785 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
786 }
787 }
788
789 return HasVectorOperand;
790}
791
792void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
793 ValueMapT &VectorMap,
794 VectorValueMapT &ScalarMaps) {
795 bool HasVectorOperand;
796 int VectorWidth = getVectorWidth();
797
798 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
799
800 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
801 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
802
803 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
804 return;
805
806 // Make the result available as vector value.
807 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
808 Value *Vector = UndefValue::get(VectorType);
809
810 for (int i = 0; i < VectorWidth; i++)
811 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
812 Builder.getInt32(i));
813
814 VectorMap[Inst] = Vector;
815}
816
817int VectorBlockGenerator::getVectorWidth() {
818 return GlobalMaps.size();
819}
820
821void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
822 ValueMapT &VectorMap,
823 VectorValueMapT &ScalarMaps) {
824 // Terminator instructions control the control flow. They are explicitly
825 // expressed in the clast and do not need to be copied.
826 if (Inst->isTerminator())
827 return;
828
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000829 if (isSCEVIgnore(Inst))
830 return;
831
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000832 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
833 generateLoad(Load, VectorMap, ScalarMaps);
834 return;
835 }
836
837 if (hasVectorOperands(Inst, VectorMap)) {
838 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
839 copyStore(Store, VectorMap, ScalarMaps);
840 return;
841 }
842
843 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
844 copyUnaryInst(Unary, VectorMap, ScalarMaps);
845 return;
846 }
847
848 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
849 copyBinaryInst(Binary, VectorMap, ScalarMaps);
850 return;
851 }
852
853 // Falltrough: We generate scalar instructions, if we don't know how to
854 // generate vector code.
855 }
856
857 copyInstScalarized(Inst, VectorMap, ScalarMaps);
858}
859
860void VectorBlockGenerator::copyBB() {
861 BasicBlock *BB = Statement.getBasicBlock();
862 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
863 Builder.GetInsertPoint(), P);
864 CopyBB->setName("polly.stmt." + BB->getName());
865 Builder.SetInsertPoint(CopyBB->begin());
866
867 // Create two maps that store the mapping from the original instructions of
868 // the old basic block to their copies in the new basic block. Those maps
869 // are basic block local.
870 //
871 // As vector code generation is supported there is one map for scalar values
872 // and one for vector values.
873 //
874 // In case we just do scalar code generation, the vectorMap is not used and
875 // the scalarMap has just one dimension, which contains the mapping.
876 //
877 // In case vector code generation is done, an instruction may either appear
878 // in the vector map once (as it is calculating >vectorwidth< values at a
879 // time. Or (if the values are calculated using scalar operations), it
880 // appears once in every dimension of the scalarMap.
881 VectorValueMapT ScalarBlockMap(getVectorWidth());
882 ValueMapT VectorBlockMap;
883
884 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
885 II != IE; ++II)
886 copyInstruction(II, VectorBlockMap, ScalarBlockMap);
887}