blob: 75122cadab9663f31693d993386d167a434fcf99 [file] [log] [blame]
Hongbin Zheng3b11a162012-04-25 13:16:49 +00001//===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the BlockGenerator and VectorBlockGenerator classes,
11// which generate sequential code and vectorized code for a polyhedral
12// statement, respectively.
13//
14//===----------------------------------------------------------------------===//
15
16#include "polly/ScopInfo.h"
Hongbin Zheng68794212012-05-06 10:22:19 +000017#include "polly/CodeGen/CodeGeneration.h"
Hongbin Zheng8a846612012-04-25 13:18:28 +000018#include "polly/CodeGen/BlockGenerators.h"
Hongbin Zheng3b11a162012-04-25 13:16:49 +000019#include "polly/Support/GICHelper.h"
20
Tobias Grossere71c6ab2012-04-27 16:36:14 +000021#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/ScalarEvolution.h"
23#include "llvm/Analysis/ScalarEvolutionExpander.h"
Hongbin Zheng3b11a162012-04-25 13:16:49 +000024#include "llvm/Transforms/Utils/BasicBlockUtils.h"
25#include "llvm/Support/CommandLine.h"
26
27#include "isl/aff.h"
28#include "isl/set.h"
29
30using namespace llvm;
31using namespace polly;
32
33static cl::opt<bool>
34Aligned("enable-polly-aligned",
35 cl::desc("Assumed aligned memory accesses."), cl::Hidden,
36 cl::value_desc("OpenMP code generation enabled if true"),
37 cl::init(false), cl::ZeroOrMore);
38
39static cl::opt<bool>
Tobias Grossere71c6ab2012-04-27 16:36:14 +000040SCEVCodegen("polly-codegen-scev",
41 cl::desc("Use SCEV based code generation."), cl::Hidden,
42 cl::init(false), cl::ZeroOrMore);
43
44/// The SCEVRewriter takes a scalar evolution expression and updates the
45/// following components:
46///
47/// - SCEVUnknown
48///
49/// Values referenced in SCEVUnknown subexpressions are looked up in
50/// two Value to Value maps (GlobalMap and BBMap). If they are found they are
51/// replaced by a reference to the value they map to.
52///
53/// - SCEVAddRecExpr
54///
55/// Based on a Loop -> Value map {Loop_1: %Value}, an expression
56/// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57/// AddRecExpr's with more than two operands can not be translated.
58///
59/// FIXME: The comment above is not yet reality. At the moment we derive
60/// %Value by looking up the canonical IV of the loop and by defining
61/// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62/// canonical induction variables.
63///
64///
65/// How can this be used?
66/// ====================
67///
68/// SCEVRewrite based code generation works on virtually independent blocks.
69/// This means we do not run the independent blocks pass to rewrite scalar
70/// instructions, but just ignore instructions that we can analyze with scalar
71/// evolution. Virtually independent blocks are blocks that only reference the
72/// following values:
73///
74/// o Values calculated within a basic block
75/// o Values representable by SCEV
76///
77/// During code generation we can ignore all instructions:
78///
79/// - Ignore all instructions except:
80/// - Load instructions
81/// - Instructions that reference operands already calculated within the
82/// basic block.
83/// - Store instructions
84struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85public:
86 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
Tobias Grosser1bb59b02012-12-29 23:47:38 +000087 ValueMapT &GlobalMap, ValueMapT &BBMap) {
Tobias Grossere71c6ab2012-04-27 16:36:14 +000088 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89 return Rewriter.visit(scev);
90 }
91
92 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94 BBMap(BBMap) {}
95
96 const SCEV *visit(const SCEV *Expr) {
97 // FIXME: The parameter handling is incorrect.
98 //
99 // Polly does only detect parameters in Access function and loop iteration
100 // counters, but it does not get parameters that are just used by
101 // instructions within the basic block.
102 //
103 // There are two options to solve this:
104 // o Iterate over all instructions of the SCoP and find the actual
105 // parameters.
106 // o Just check within the SCEVRewriter if Values lay outside of the SCoP
107 // and detect parameters on the fly.
108 //
109 // This is especially important for OpenMP and GPGPU code generation, as
110 // they require us to detect and possibly rewrite the corresponding
111 // parameters.
112 if (isl_id *Id = S.getIdForParam(Expr)) {
113 isl_id_free(Id);
114 return Expr;
115 }
116
117
118 return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119 }
120
121 const SCEV *visitConstant(const SCEVConstant *Constant) {
122 return Constant;
123 }
124
125 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126 const SCEV *Operand = visit(Expr->getOperand());
127 return SE.getTruncateExpr(Operand, Expr->getType());
128 }
129
130 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131 const SCEV *Operand = visit(Expr->getOperand());
132 return SE.getZeroExtendExpr(Operand, Expr->getType());
133 }
134
135 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136 const SCEV *Operand = visit(Expr->getOperand());
137 return SE.getSignExtendExpr(Operand, Expr->getType());
138 }
139
140 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141 SmallVector<const SCEV *, 2> Operands;
142 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143 const SCEV *Operand = visit(Expr->getOperand(i));
144 Operands.push_back(Operand);
145 }
146
147 return SE.getAddExpr(Operands);
148 }
149
150 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151 SmallVector<const SCEV *, 2> Operands;
152 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153 const SCEV *Operand = visit(Expr->getOperand(i));
154 Operands.push_back(Operand);
155 }
156
157 return SE.getMulExpr(Operands);
158 }
159
160 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162 }
163
164 // Return a new induction variable if the loop is within the original SCoP
165 // or NULL otherwise.
166 Value *getNewIV(const Loop *L) {
167 Value *IV = L->getCanonicalInductionVariable();
168 if (!IV)
169 return NULL;
170
171 ValueMapT::iterator NewIV = GlobalMap.find(IV);
172
173 if (NewIV == GlobalMap.end())
174 return NULL;
175
176 return NewIV->second;
177 }
178
179 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180 Value *IV;
181
182 IV = getNewIV(Expr->getLoop());
183
184 // The IV is not within the GlobalMaps. So do not rewrite it and also do
185 // not rewrite any descendants.
186 if (!IV)
187 return Expr;
188
189 assert(Expr->getNumOperands() == 2
190 && "An AddRecExpr with more than two operands can not be rewritten.");
191
192 const SCEV *Base, *Step, *IVExpr, *Product;
193
194 Base = visit(Expr->getStart());
195 Step = visit(Expr->getOperand(1));
196 IVExpr = SE.getUnknown(IV);
197 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198 Product = SE.getMulExpr(Step, IVExpr);
199
200 return SE.getAddExpr(Base, Product);
201 }
202
203 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204 SmallVector<const SCEV *, 2> Operands;
205 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206 const SCEV *Operand = visit(Expr->getOperand(i));
207 Operands.push_back(Operand);
208 }
209
210 return SE.getSMaxExpr(Operands);
211 }
212
213 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214 SmallVector<const SCEV *, 2> Operands;
215 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216 const SCEV *Operand = visit(Expr->getOperand(i));
217 Operands.push_back(Operand);
218 }
219
220 return SE.getUMaxExpr(Operands);
221 }
222
223 const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224 Value *V = Expr->getValue();
225
226 if (GlobalMap.count(V))
227 return SE.getUnknown(GlobalMap[V]);
228
229 if (BBMap.count(V))
230 return SE.getUnknown(BBMap[V]);
231
232 return Expr;
233 }
234
235private:
236 Scop &S;
237 ScalarEvolution &SE;
238 ValueMapT &GlobalMap;
239 ValueMapT &BBMap;
240};
241
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000242// Helper class to generate memory location.
243namespace {
244class IslGenerator {
245public:
246 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
247 Builder(Builder), IVS(IVS) {}
248 Value *generateIslInt(__isl_take isl_int Int);
249 Value *generateIslAff(__isl_take isl_aff *Aff);
250 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
251
252private:
253 typedef struct {
254 Value *Result;
255 class IslGenerator *Generator;
256 } IslGenInfo;
257
258 IRBuilder<> &Builder;
259 std::vector<Value *> &IVS;
Tobias Grosser1bb59b02012-12-29 23:47:38 +0000260 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff,
261 void *User);
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000262};
263}
264
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000265Value *IslGenerator::generateIslInt(isl_int Int) {
266 mpz_t IntMPZ;
267 mpz_init(IntMPZ);
268 isl_int_get_gmp(Int, IntMPZ);
269 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
270 mpz_clear(IntMPZ);
271 return IntValue;
272}
273
274Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
275 Value *Result;
276 Value *ConstValue;
277 isl_int ConstIsl;
278
279 isl_int_init(ConstIsl);
280 isl_aff_get_constant(Aff, &ConstIsl);
281 ConstValue = generateIslInt(ConstIsl);
282 Type *Ty = Builder.getInt64Ty();
283
284 // FIXME: We should give the constant and coefficients the right type. Here
285 // we force it into i64.
286 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
287
288 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
289
290 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
291 "must match the dimension of the affine space.");
292
293 isl_int CoefficientIsl;
294 isl_int_init(CoefficientIsl);
295
296 for (unsigned int i = 0; i < NbInputDims; ++i) {
297 Value *CoefficientValue;
298 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
299
300 if (isl_int_is_zero(CoefficientIsl))
301 continue;
302
303 CoefficientValue = generateIslInt(CoefficientIsl);
304 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
305 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
306 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
307 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
308 }
309
310 isl_int_clear(CoefficientIsl);
311 isl_int_clear(ConstIsl);
312 isl_aff_free(Aff);
313
314 return Result;
315}
316
317int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
318 __isl_take isl_aff *Aff, void *User) {
319 IslGenInfo *GenInfo = (IslGenInfo *)User;
320
321 assert((GenInfo->Result == NULL) && "Result is already set."
322 "Currently only single isl_aff is supported");
323 assert(isl_set_plain_is_universe(Set)
324 && "Code generation failed because the set is not universe");
325
326 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
327
328 isl_set_free(Set);
329 return 0;
330}
331
332Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
333 IslGenInfo User;
334 User.Result = NULL;
335 User.Generator = this;
336 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
337 assert(User.Result && "Code generation for isl_pw_aff failed");
338
339 isl_pw_aff_free(PwAff);
340 return User.Result;
341}
342
343
344BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000345 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
346
347bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
348 if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
349 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
350 if (!isa<SCEVCouldNotCompute>(Scev)) {
351 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
352 if (Unknown->getValue() != Inst)
353 return true;
354 } else {
355 return true;
356 }
357 }
358
359 return false;
360}
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000361
362Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
363 ValueMapT &GlobalMap) {
364 // We assume constants never change.
365 // This avoids map lookups for many calls to this function.
366 if (isa<Constant>(Old))
367 return const_cast<Value*>(Old);
368
369 if (GlobalMap.count(Old)) {
370 Value *New = GlobalMap[Old];
371
372 if (Old->getType()->getScalarSizeInBits()
373 < New->getType()->getScalarSizeInBits())
374 New = Builder.CreateTruncOrBitCast(New, Old->getType());
375
376 return New;
377 }
378
379 if (BBMap.count(Old)) {
380 return BBMap[Old];
381 }
382
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000383 if (SCEVCodegen && SE.isSCEVable(Old->getType()))
384 if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
385 if (!isa<SCEVCouldNotCompute>(Scev)) {
386 const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
387 *Statement.getParent(), SE,
388 GlobalMap, BBMap);
389 SCEVExpander Expander(SE, "polly");
390 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
391 Builder.GetInsertPoint());
392
393 BBMap[Old] = Expanded;
394 return Expanded;
395 }
396
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000397 // 'Old' is within the original SCoP, but was not rewritten.
398 //
399 // Such values appear, if they only calculate information already available in
400 // the polyhedral description (e.g. an induction variable increment). They
401 // can be safely ignored.
402 if (const Instruction *Inst = dyn_cast<Instruction>(Old))
403 if (Statement.getParent()->getRegion().contains(Inst->getParent()))
404 return NULL;
405
406 // Everything else is probably a scop-constant value defined as global,
407 // function parameter or an instruction not within the scop.
408 return const_cast<Value*>(Old);
409}
410
411void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
412 ValueMapT &GlobalMap) {
413 Instruction *NewInst = Inst->clone();
414
415 // Replace old operands with the new ones.
416 for (Instruction::const_op_iterator OI = Inst->op_begin(),
417 OE = Inst->op_end(); OI != OE; ++OI) {
418 Value *OldOperand = *OI;
419 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
420
421 if (!NewOperand) {
422 assert(!isa<StoreInst>(NewInst)
423 && "Store instructions are always needed!");
424 delete NewInst;
425 return;
426 }
427
428 NewInst->replaceUsesOfWith(OldOperand, NewOperand);
429 }
430
431 Builder.Insert(NewInst);
432 BBMap[Inst] = NewInst;
433
434 if (!NewInst->getType()->isVoidTy())
435 NewInst->setName("p_" + Inst->getName());
436}
437
438std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
439 __isl_keep isl_map *AccessRelation, Value *BaseAddress,
440 ValueMapT &BBMap, ValueMapT &GlobalMap) {
441
442 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
443 && "Only single dimensional access functions supported");
444
445 std::vector<Value *> IVS;
446 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
447 const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
448 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
449 IVS.push_back(NewIV);
450 }
451
452 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
453 IslGenerator IslGen(Builder, IVS);
454 Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
455
456 Type *Ty = Builder.getInt64Ty();
457 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
458
459 std::vector<Value*> IndexArray;
460 Value *NullValue = Constant::getNullValue(Ty);
461 IndexArray.push_back(NullValue);
462 IndexArray.push_back(OffsetValue);
463 return IndexArray;
464}
465
466Value *BlockGenerator::getNewAccessOperand(
467 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
468 ValueMapT &BBMap, ValueMapT &GlobalMap) {
469 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
470 BaseAddress,
471 BBMap, GlobalMap);
472 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
473 "p_newarrayidx_");
474 return NewOperand;
475}
476
477Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
478 const Value *Pointer,
479 ValueMapT &BBMap,
480 ValueMapT &GlobalMap) {
481 MemoryAccess &Access = Statement.getAccessFor(Inst);
482 isl_map *CurrentAccessRelation = Access.getAccessRelation();
483 isl_map *NewAccessRelation = Access.getNewAccessRelation();
484
485 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
486 && "Current and new access function use different spaces");
487
488 Value *NewPointer;
489
490 if (!NewAccessRelation) {
491 NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
492 } else {
493 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
494 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
495 BBMap, GlobalMap);
496 }
497
498 isl_map_free(CurrentAccessRelation);
499 isl_map_free(NewAccessRelation);
500 return NewPointer;
501}
502
503Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
504 ValueMapT &BBMap,
505 ValueMapT &GlobalMap) {
506 const Value *Pointer = Load->getPointerOperand();
507 const Instruction *Inst = dyn_cast<Instruction>(Load);
508 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
509 Value *ScalarLoad = Builder.CreateLoad(NewPointer,
510 Load->getName() + "_p_scalar_");
511 return ScalarLoad;
512}
513
514Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
515 ValueMapT &BBMap,
516 ValueMapT &GlobalMap) {
517 const Value *Pointer = Store->getPointerOperand();
518 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
519 GlobalMap);
520 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
521
522 return Builder.CreateStore(ValueOperand, NewPointer);
523}
524
Tobias Grosser1bb59b02012-12-29 23:47:38 +0000525void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
526 ValueMapT &GlobalMap) {
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000527 // Terminator instructions control the control flow. They are explicitly
528 // expressed in the clast and do not need to be copied.
529 if (Inst->isTerminator())
530 return;
531
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000532 if (isSCEVIgnore(Inst))
533 return;
534
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000535 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
536 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
537 return;
538 }
539
540 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
541 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
542 return;
543 }
544
545 copyInstScalar(Inst, BBMap, GlobalMap);
546}
547
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000548void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
549 BasicBlock *BB = Statement.getBasicBlock();
550 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
551 Builder.GetInsertPoint(), P);
552 CopyBB->setName("polly.stmt." + BB->getName());
553 Builder.SetInsertPoint(CopyBB->begin());
554
555 ValueMapT BBMap;
556
557 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
558 ++II)
559 copyInstruction(II, BBMap, GlobalMap);
560}
561
562VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
Sebastian Popa00a0292012-12-18 07:46:06 +0000563 VectorValueMapT &GlobalMaps,
564 ScopStmt &Stmt,
565 __isl_keep isl_map *Schedule,
566 Pass *P)
567 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
568 assert(GlobalMaps.size() > 1 && "Only one vector lane found");
569 assert(Schedule && "No statement domain provided");
570}
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000571
572Value *VectorBlockGenerator::getVectorValue(const Value *Old,
573 ValueMapT &VectorMap,
574 VectorValueMapT &ScalarMaps) {
575 if (VectorMap.count(Old))
576 return VectorMap[Old];
577
578 int Width = getVectorWidth();
579
580 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
581
582 for (int Lane = 0; Lane < Width; Lane++)
583 Vector = Builder.CreateInsertElement(Vector,
584 getNewValue(Old,
585 ScalarMaps[Lane],
586 GlobalMaps[Lane]),
587 Builder.getInt32(Lane));
588
589 VectorMap[Old] = Vector;
590
591 return Vector;
592}
593
594Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
595 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
596 assert(PointerTy && "PointerType expected");
597
598 Type *ScalarType = PointerTy->getElementType();
599 VectorType *VectorType = VectorType::get(ScalarType, Width);
600
601 return PointerType::getUnqual(VectorType);
602}
603
604Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
605 ValueMapT &BBMap) {
606 const Value *Pointer = Load->getPointerOperand();
607 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
608 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
609 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
610 "vector_ptr");
611 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
612 Load->getName() + "_p_vec_full");
613 if (!Aligned)
614 VecLoad->setAlignment(8);
615
616 return VecLoad;
617}
618
619Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
620 ValueMapT &BBMap) {
621 const Value *Pointer = Load->getPointerOperand();
622 Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
623 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
624 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
625 Load->getName() + "_p_vec_p");
626 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
627 Load->getName() + "_p_splat_one");
628
629 if (!Aligned)
630 ScalarLoad->setAlignment(8);
631
632 Constant *SplatVector =
633 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
634 getVectorWidth()));
635
636 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
637 SplatVector,
638 Load->getName()
639 + "_p_splat");
640 return VectorLoad;
641}
642
643Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
644 VectorValueMapT &ScalarMaps) {
645 int VectorWidth = getVectorWidth();
646 const Value *Pointer = Load->getPointerOperand();
647 VectorType *VectorType = VectorType::get(
648 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
649
650 Value *Vector = UndefValue::get(VectorType);
651
652 for (int i = 0; i < VectorWidth; i++) {
653 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
654 Value *ScalarLoad = Builder.CreateLoad(NewPointer,
655 Load->getName() + "_p_scalar_");
656 Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
657 Builder.getInt32(i),
658 Load->getName() + "_p_vec_");
659 }
660
661 return Vector;
662}
663
664void VectorBlockGenerator::generateLoad(const LoadInst *Load,
665 ValueMapT &VectorMap,
666 VectorValueMapT &ScalarMaps) {
Hongbin Zheng68794212012-05-06 10:22:19 +0000667 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
668 !VectorType::isValidElementType(Load->getType())) {
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000669 for (int i = 0; i < getVectorWidth(); i++)
670 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
671 GlobalMaps[i]);
672 return;
673 }
674
675 MemoryAccess &Access = Statement.getAccessFor(Load);
676
677 Value *NewLoad;
Sebastian Popa00a0292012-12-18 07:46:06 +0000678 if (Access.isStrideZero(isl_map_copy(Schedule)))
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000679 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
Sebastian Popa00a0292012-12-18 07:46:06 +0000680 else if (Access.isStrideOne(isl_map_copy(Schedule)))
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000681 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
682 else
683 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
684
685 VectorMap[Load] = NewLoad;
686}
687
688void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
689 ValueMapT &VectorMap,
690 VectorValueMapT &ScalarMaps) {
691 int VectorWidth = getVectorWidth();
692 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
693 ScalarMaps);
694
695 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
696
697 const CastInst *Cast = dyn_cast<CastInst>(Inst);
698 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
699 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
700}
701
702void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
703 ValueMapT &VectorMap,
704 VectorValueMapT &ScalarMaps) {
705 Value *OpZero = Inst->getOperand(0);
706 Value *OpOne = Inst->getOperand(1);
707
708 Value *NewOpZero, *NewOpOne;
709 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
710 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
711
Tobias Grosser1bb59b02012-12-29 23:47:38 +0000712 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000713 Inst->getName() + "p_vec");
714 VectorMap[Inst] = NewInst;
715}
716
717void VectorBlockGenerator::copyStore(const StoreInst *Store,
718 ValueMapT &VectorMap,
719 VectorValueMapT &ScalarMaps) {
720 int VectorWidth = getVectorWidth();
721
722 MemoryAccess &Access = Statement.getAccessFor(Store);
723
724 const Value *Pointer = Store->getPointerOperand();
725 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
726 ScalarMaps);
727
Sebastian Popa00a0292012-12-18 07:46:06 +0000728 if (Access.isStrideOne(isl_map_copy(Schedule))) {
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000729 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
730 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
731
732 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
733 "vector_ptr");
734 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
735
736 if (!Aligned)
737 Store->setAlignment(8);
738 } else {
739 for (unsigned i = 0; i < ScalarMaps.size(); i++) {
Tobias Grosser1bb59b02012-12-29 23:47:38 +0000740 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000741 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
742 Builder.CreateStore(Scalar, NewPointer);
743 }
744 }
745}
746
747bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
748 ValueMapT &VectorMap) {
749 for (Instruction::const_op_iterator OI = Inst->op_begin(),
750 OE = Inst->op_end(); OI != OE; ++OI)
751 if (VectorMap.count(*OI))
752 return true;
753 return false;
754}
755
756bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
757 ValueMapT &VectorMap,
758 VectorValueMapT &ScalarMaps) {
759 bool HasVectorOperand = false;
760 int VectorWidth = getVectorWidth();
761
762 for (Instruction::const_op_iterator OI = Inst->op_begin(),
763 OE = Inst->op_end(); OI != OE; ++OI) {
764 ValueMapT::iterator VecOp = VectorMap.find(*OI);
765
766 if (VecOp == VectorMap.end())
767 continue;
768
769 HasVectorOperand = true;
770 Value *NewVector = VecOp->second;
771
772 for (int i = 0; i < VectorWidth; ++i) {
773 ValueMapT &SM = ScalarMaps[i];
774
775 // If there is one scalar extracted, all scalar elements should have
776 // already been extracted by the code here. So no need to check for the
777 // existance of all of them.
778 if (SM.count(*OI))
779 break;
780
781 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
782 }
783 }
784
785 return HasVectorOperand;
786}
787
788void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
789 ValueMapT &VectorMap,
790 VectorValueMapT &ScalarMaps) {
791 bool HasVectorOperand;
792 int VectorWidth = getVectorWidth();
793
794 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
795
796 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
797 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
798
799 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
800 return;
801
802 // Make the result available as vector value.
803 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
804 Value *Vector = UndefValue::get(VectorType);
805
806 for (int i = 0; i < VectorWidth; i++)
807 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
808 Builder.getInt32(i));
809
810 VectorMap[Inst] = Vector;
811}
812
813int VectorBlockGenerator::getVectorWidth() {
814 return GlobalMaps.size();
815}
816
817void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
818 ValueMapT &VectorMap,
819 VectorValueMapT &ScalarMaps) {
820 // Terminator instructions control the control flow. They are explicitly
821 // expressed in the clast and do not need to be copied.
822 if (Inst->isTerminator())
823 return;
824
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000825 if (isSCEVIgnore(Inst))
826 return;
827
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000828 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
829 generateLoad(Load, VectorMap, ScalarMaps);
830 return;
831 }
832
833 if (hasVectorOperands(Inst, VectorMap)) {
834 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
835 copyStore(Store, VectorMap, ScalarMaps);
836 return;
837 }
838
839 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
840 copyUnaryInst(Unary, VectorMap, ScalarMaps);
841 return;
842 }
843
844 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
845 copyBinaryInst(Binary, VectorMap, ScalarMaps);
846 return;
847 }
848
849 // Falltrough: We generate scalar instructions, if we don't know how to
850 // generate vector code.
851 }
852
853 copyInstScalarized(Inst, VectorMap, ScalarMaps);
854}
855
856void VectorBlockGenerator::copyBB() {
857 BasicBlock *BB = Statement.getBasicBlock();
858 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
859 Builder.GetInsertPoint(), P);
860 CopyBB->setName("polly.stmt." + BB->getName());
861 Builder.SetInsertPoint(CopyBB->begin());
862
863 // Create two maps that store the mapping from the original instructions of
864 // the old basic block to their copies in the new basic block. Those maps
865 // are basic block local.
866 //
867 // As vector code generation is supported there is one map for scalar values
868 // and one for vector values.
869 //
870 // In case we just do scalar code generation, the vectorMap is not used and
871 // the scalarMap has just one dimension, which contains the mapping.
872 //
873 // In case vector code generation is done, an instruction may either appear
874 // in the vector map once (as it is calculating >vectorwidth< values at a
875 // time. Or (if the values are calculated using scalar operations), it
876 // appears once in every dimension of the scalarMap.
877 VectorValueMapT ScalarBlockMap(getVectorWidth());
878 ValueMapT VectorBlockMap;
879
880 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
881 II != IE; ++II)
882 copyInstruction(II, VectorBlockMap, ScalarBlockMap);
883}