blob: e8109bdd1fe33b6d70caf34c54e22b08fe4594cc [file] [log] [blame]
Hongbin Zheng3b11a162012-04-25 13:16:49 +00001//===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the BlockGenerator and VectorBlockGenerator classes,
11// which generate sequential code and vectorized code for a polyhedral
12// statement, respectively.
13//
14//===----------------------------------------------------------------------===//
15
16#include "polly/ScopInfo.h"
Hongbin Zheng68794212012-05-06 10:22:19 +000017#include "polly/CodeGen/CodeGeneration.h"
Hongbin Zheng8a846612012-04-25 13:18:28 +000018#include "polly/CodeGen/BlockGenerators.h"
Hongbin Zheng3b11a162012-04-25 13:16:49 +000019#include "polly/Support/GICHelper.h"
20
Tobias Grossere71c6ab2012-04-27 16:36:14 +000021#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/ScalarEvolution.h"
23#include "llvm/Analysis/ScalarEvolutionExpander.h"
Hongbin Zheng3b11a162012-04-25 13:16:49 +000024#include "llvm/Transforms/Utils/BasicBlockUtils.h"
25#include "llvm/Support/CommandLine.h"
26
27#include "isl/aff.h"
28#include "isl/set.h"
29
30using namespace llvm;
31using namespace polly;
32
33static cl::opt<bool>
34Aligned("enable-polly-aligned",
35 cl::desc("Assumed aligned memory accesses."), cl::Hidden,
36 cl::value_desc("OpenMP code generation enabled if true"),
37 cl::init(false), cl::ZeroOrMore);
38
39static cl::opt<bool>
Tobias Grossere71c6ab2012-04-27 16:36:14 +000040SCEVCodegen("polly-codegen-scev",
41 cl::desc("Use SCEV based code generation."), cl::Hidden,
42 cl::init(false), cl::ZeroOrMore);
43
44/// The SCEVRewriter takes a scalar evolution expression and updates the
45/// following components:
46///
47/// - SCEVUnknown
48///
49/// Values referenced in SCEVUnknown subexpressions are looked up in
50/// two Value to Value maps (GlobalMap and BBMap). If they are found they are
51/// replaced by a reference to the value they map to.
52///
53/// - SCEVAddRecExpr
54///
55/// Based on a Loop -> Value map {Loop_1: %Value}, an expression
56/// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57/// AddRecExpr's with more than two operands can not be translated.
58///
59/// FIXME: The comment above is not yet reality. At the moment we derive
60/// %Value by looking up the canonical IV of the loop and by defining
61/// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62/// canonical induction variables.
63///
64///
65/// How can this be used?
66/// ====================
67///
68/// SCEVRewrite based code generation works on virtually independent blocks.
69/// This means we do not run the independent blocks pass to rewrite scalar
70/// instructions, but just ignore instructions that we can analyze with scalar
71/// evolution. Virtually independent blocks are blocks that only reference the
72/// following values:
73///
74/// o Values calculated within a basic block
75/// o Values representable by SCEV
76///
77/// During code generation we can ignore all instructions:
78///
79/// - Ignore all instructions except:
80/// - Load instructions
81/// - Instructions that reference operands already calculated within the
82/// basic block.
83/// - Store instructions
84struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85public:
86 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
87 ValueMapT &GlobalMap, ValueMapT &BBMap) {
88 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89 return Rewriter.visit(scev);
90 }
91
92 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94 BBMap(BBMap) {}
95
96 const SCEV *visit(const SCEV *Expr) {
97 // FIXME: The parameter handling is incorrect.
98 //
99 // Polly does only detect parameters in Access function and loop iteration
100 // counters, but it does not get parameters that are just used by
101 // instructions within the basic block.
102 //
103 // There are two options to solve this:
104 // o Iterate over all instructions of the SCoP and find the actual
105 // parameters.
106 // o Just check within the SCEVRewriter if Values lay outside of the SCoP
107 // and detect parameters on the fly.
108 //
109 // This is especially important for OpenMP and GPGPU code generation, as
110 // they require us to detect and possibly rewrite the corresponding
111 // parameters.
112 if (isl_id *Id = S.getIdForParam(Expr)) {
113 isl_id_free(Id);
114 return Expr;
115 }
116
117
118 return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119 }
120
121 const SCEV *visitConstant(const SCEVConstant *Constant) {
122 return Constant;
123 }
124
125 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126 const SCEV *Operand = visit(Expr->getOperand());
127 return SE.getTruncateExpr(Operand, Expr->getType());
128 }
129
130 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131 const SCEV *Operand = visit(Expr->getOperand());
132 return SE.getZeroExtendExpr(Operand, Expr->getType());
133 }
134
135 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136 const SCEV *Operand = visit(Expr->getOperand());
137 return SE.getSignExtendExpr(Operand, Expr->getType());
138 }
139
140 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141 SmallVector<const SCEV *, 2> Operands;
142 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143 const SCEV *Operand = visit(Expr->getOperand(i));
144 Operands.push_back(Operand);
145 }
146
147 return SE.getAddExpr(Operands);
148 }
149
150 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151 SmallVector<const SCEV *, 2> Operands;
152 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153 const SCEV *Operand = visit(Expr->getOperand(i));
154 Operands.push_back(Operand);
155 }
156
157 return SE.getMulExpr(Operands);
158 }
159
160 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162 }
163
164 // Return a new induction variable if the loop is within the original SCoP
165 // or NULL otherwise.
166 Value *getNewIV(const Loop *L) {
167 Value *IV = L->getCanonicalInductionVariable();
168 if (!IV)
169 return NULL;
170
171 ValueMapT::iterator NewIV = GlobalMap.find(IV);
172
173 if (NewIV == GlobalMap.end())
174 return NULL;
175
176 return NewIV->second;
177 }
178
179 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180 Value *IV;
181
182 IV = getNewIV(Expr->getLoop());
183
184 // The IV is not within the GlobalMaps. So do not rewrite it and also do
185 // not rewrite any descendants.
186 if (!IV)
187 return Expr;
188
189 assert(Expr->getNumOperands() == 2
190 && "An AddRecExpr with more than two operands can not be rewritten.");
191
192 const SCEV *Base, *Step, *IVExpr, *Product;
193
194 Base = visit(Expr->getStart());
195 Step = visit(Expr->getOperand(1));
196 IVExpr = SE.getUnknown(IV);
197 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198 Product = SE.getMulExpr(Step, IVExpr);
199
200 return SE.getAddExpr(Base, Product);
201 }
202
203 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204 SmallVector<const SCEV *, 2> Operands;
205 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206 const SCEV *Operand = visit(Expr->getOperand(i));
207 Operands.push_back(Operand);
208 }
209
210 return SE.getSMaxExpr(Operands);
211 }
212
213 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214 SmallVector<const SCEV *, 2> Operands;
215 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216 const SCEV *Operand = visit(Expr->getOperand(i));
217 Operands.push_back(Operand);
218 }
219
220 return SE.getUMaxExpr(Operands);
221 }
222
223 const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224 Value *V = Expr->getValue();
225
226 if (GlobalMap.count(V))
227 return SE.getUnknown(GlobalMap[V]);
228
229 if (BBMap.count(V))
230 return SE.getUnknown(BBMap[V]);
231
232 return Expr;
233 }
234
235private:
236 Scop &S;
237 ScalarEvolution &SE;
238 ValueMapT &GlobalMap;
239 ValueMapT &BBMap;
240};
241
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000242// Helper class to generate memory location.
243namespace {
244class IslGenerator {
245public:
246 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
247 Builder(Builder), IVS(IVS) {}
248 Value *generateIslInt(__isl_take isl_int Int);
249 Value *generateIslAff(__isl_take isl_aff *Aff);
250 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
251
252private:
253 typedef struct {
254 Value *Result;
255 class IslGenerator *Generator;
256 } IslGenInfo;
257
258 IRBuilder<> &Builder;
259 std::vector<Value *> &IVS;
260 static int mergeIslAffValues(__isl_take isl_set *Set,
261 __isl_take isl_aff *Aff, void *User);
262};
263}
264
265
266Value *IslGenerator::generateIslInt(isl_int Int) {
267 mpz_t IntMPZ;
268 mpz_init(IntMPZ);
269 isl_int_get_gmp(Int, IntMPZ);
270 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
271 mpz_clear(IntMPZ);
272 return IntValue;
273}
274
275Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
276 Value *Result;
277 Value *ConstValue;
278 isl_int ConstIsl;
279
280 isl_int_init(ConstIsl);
281 isl_aff_get_constant(Aff, &ConstIsl);
282 ConstValue = generateIslInt(ConstIsl);
283 Type *Ty = Builder.getInt64Ty();
284
285 // FIXME: We should give the constant and coefficients the right type. Here
286 // we force it into i64.
287 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
288
289 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
290
291 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
292 "must match the dimension of the affine space.");
293
294 isl_int CoefficientIsl;
295 isl_int_init(CoefficientIsl);
296
297 for (unsigned int i = 0; i < NbInputDims; ++i) {
298 Value *CoefficientValue;
299 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
300
301 if (isl_int_is_zero(CoefficientIsl))
302 continue;
303
304 CoefficientValue = generateIslInt(CoefficientIsl);
305 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
306 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
307 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
308 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
309 }
310
311 isl_int_clear(CoefficientIsl);
312 isl_int_clear(ConstIsl);
313 isl_aff_free(Aff);
314
315 return Result;
316}
317
318int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
319 __isl_take isl_aff *Aff, void *User) {
320 IslGenInfo *GenInfo = (IslGenInfo *)User;
321
322 assert((GenInfo->Result == NULL) && "Result is already set."
323 "Currently only single isl_aff is supported");
324 assert(isl_set_plain_is_universe(Set)
325 && "Code generation failed because the set is not universe");
326
327 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
328
329 isl_set_free(Set);
330 return 0;
331}
332
333Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
334 IslGenInfo User;
335 User.Result = NULL;
336 User.Generator = this;
337 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
338 assert(User.Result && "Code generation for isl_pw_aff failed");
339
340 isl_pw_aff_free(PwAff);
341 return User.Result;
342}
343
344
345BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000346 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
347
348bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
349 if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
350 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
351 if (!isa<SCEVCouldNotCompute>(Scev)) {
352 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
353 if (Unknown->getValue() != Inst)
354 return true;
355 } else {
356 return true;
357 }
358 }
359
360 return false;
361}
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000362
363Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
364 ValueMapT &GlobalMap) {
365 // We assume constants never change.
366 // This avoids map lookups for many calls to this function.
367 if (isa<Constant>(Old))
368 return const_cast<Value*>(Old);
369
370 if (GlobalMap.count(Old)) {
371 Value *New = GlobalMap[Old];
372
373 if (Old->getType()->getScalarSizeInBits()
374 < New->getType()->getScalarSizeInBits())
375 New = Builder.CreateTruncOrBitCast(New, Old->getType());
376
377 return New;
378 }
379
380 if (BBMap.count(Old)) {
381 return BBMap[Old];
382 }
383
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000384 if (SCEVCodegen && SE.isSCEVable(Old->getType()))
385 if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
386 if (!isa<SCEVCouldNotCompute>(Scev)) {
387 const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
388 *Statement.getParent(), SE,
389 GlobalMap, BBMap);
390 SCEVExpander Expander(SE, "polly");
391 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
392 Builder.GetInsertPoint());
393
394 BBMap[Old] = Expanded;
395 return Expanded;
396 }
397
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000398 // 'Old' is within the original SCoP, but was not rewritten.
399 //
400 // Such values appear, if they only calculate information already available in
401 // the polyhedral description (e.g. an induction variable increment). They
402 // can be safely ignored.
403 if (const Instruction *Inst = dyn_cast<Instruction>(Old))
404 if (Statement.getParent()->getRegion().contains(Inst->getParent()))
405 return NULL;
406
407 // Everything else is probably a scop-constant value defined as global,
408 // function parameter or an instruction not within the scop.
409 return const_cast<Value*>(Old);
410}
411
412void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
413 ValueMapT &GlobalMap) {
414 Instruction *NewInst = Inst->clone();
415
416 // Replace old operands with the new ones.
417 for (Instruction::const_op_iterator OI = Inst->op_begin(),
418 OE = Inst->op_end(); OI != OE; ++OI) {
419 Value *OldOperand = *OI;
420 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
421
422 if (!NewOperand) {
423 assert(!isa<StoreInst>(NewInst)
424 && "Store instructions are always needed!");
425 delete NewInst;
426 return;
427 }
428
429 NewInst->replaceUsesOfWith(OldOperand, NewOperand);
430 }
431
432 Builder.Insert(NewInst);
433 BBMap[Inst] = NewInst;
434
435 if (!NewInst->getType()->isVoidTy())
436 NewInst->setName("p_" + Inst->getName());
437}
438
439std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
440 __isl_keep isl_map *AccessRelation, Value *BaseAddress,
441 ValueMapT &BBMap, ValueMapT &GlobalMap) {
442
443 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
444 && "Only single dimensional access functions supported");
445
446 std::vector<Value *> IVS;
447 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
448 const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
449 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
450 IVS.push_back(NewIV);
451 }
452
453 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
454 IslGenerator IslGen(Builder, IVS);
455 Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
456
457 Type *Ty = Builder.getInt64Ty();
458 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
459
460 std::vector<Value*> IndexArray;
461 Value *NullValue = Constant::getNullValue(Ty);
462 IndexArray.push_back(NullValue);
463 IndexArray.push_back(OffsetValue);
464 return IndexArray;
465}
466
467Value *BlockGenerator::getNewAccessOperand(
468 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
469 ValueMapT &BBMap, ValueMapT &GlobalMap) {
470 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
471 BaseAddress,
472 BBMap, GlobalMap);
473 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
474 "p_newarrayidx_");
475 return NewOperand;
476}
477
478Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
479 const Value *Pointer,
480 ValueMapT &BBMap,
481 ValueMapT &GlobalMap) {
482 MemoryAccess &Access = Statement.getAccessFor(Inst);
483 isl_map *CurrentAccessRelation = Access.getAccessRelation();
484 isl_map *NewAccessRelation = Access.getNewAccessRelation();
485
486 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
487 && "Current and new access function use different spaces");
488
489 Value *NewPointer;
490
491 if (!NewAccessRelation) {
492 NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
493 } else {
494 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
495 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
496 BBMap, GlobalMap);
497 }
498
499 isl_map_free(CurrentAccessRelation);
500 isl_map_free(NewAccessRelation);
501 return NewPointer;
502}
503
504Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
505 ValueMapT &BBMap,
506 ValueMapT &GlobalMap) {
507 const Value *Pointer = Load->getPointerOperand();
508 const Instruction *Inst = dyn_cast<Instruction>(Load);
509 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
510 Value *ScalarLoad = Builder.CreateLoad(NewPointer,
511 Load->getName() + "_p_scalar_");
512 return ScalarLoad;
513}
514
515Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
516 ValueMapT &BBMap,
517 ValueMapT &GlobalMap) {
518 const Value *Pointer = Store->getPointerOperand();
519 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
520 GlobalMap);
521 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
522
523 return Builder.CreateStore(ValueOperand, NewPointer);
524}
525
526void BlockGenerator::copyInstruction(const Instruction *Inst,
527 ValueMapT &BBMap, ValueMapT &GlobalMap) {
528 // Terminator instructions control the control flow. They are explicitly
529 // expressed in the clast and do not need to be copied.
530 if (Inst->isTerminator())
531 return;
532
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000533 if (isSCEVIgnore(Inst))
534 return;
535
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000536 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
537 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
538 return;
539 }
540
541 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
542 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
543 return;
544 }
545
546 copyInstScalar(Inst, BBMap, GlobalMap);
547}
548
549
550void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
551 BasicBlock *BB = Statement.getBasicBlock();
552 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
553 Builder.GetInsertPoint(), P);
554 CopyBB->setName("polly.stmt." + BB->getName());
555 Builder.SetInsertPoint(CopyBB->begin());
556
557 ValueMapT BBMap;
558
559 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
560 ++II)
561 copyInstruction(II, BBMap, GlobalMap);
562}
563
564VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
565 VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain,
566 Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps),
567 Domain(Domain) {
568 assert(GlobalMaps.size() > 1 && "Only one vector lane found");
569 assert(Domain && "No statement domain provided");
570 }
571
572Value *VectorBlockGenerator::getVectorValue(const Value *Old,
573 ValueMapT &VectorMap,
574 VectorValueMapT &ScalarMaps) {
575 if (VectorMap.count(Old))
576 return VectorMap[Old];
577
578 int Width = getVectorWidth();
579
580 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
581
582 for (int Lane = 0; Lane < Width; Lane++)
583 Vector = Builder.CreateInsertElement(Vector,
584 getNewValue(Old,
585 ScalarMaps[Lane],
586 GlobalMaps[Lane]),
587 Builder.getInt32(Lane));
588
589 VectorMap[Old] = Vector;
590
591 return Vector;
592}
593
594Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
595 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
596 assert(PointerTy && "PointerType expected");
597
598 Type *ScalarType = PointerTy->getElementType();
599 VectorType *VectorType = VectorType::get(ScalarType, Width);
600
601 return PointerType::getUnqual(VectorType);
602}
603
604Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
605 ValueMapT &BBMap) {
606 const Value *Pointer = Load->getPointerOperand();
607 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
608 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
609 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
610 "vector_ptr");
611 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
612 Load->getName() + "_p_vec_full");
613 if (!Aligned)
614 VecLoad->setAlignment(8);
615
616 return VecLoad;
617}
618
619Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
620 ValueMapT &BBMap) {
621 const Value *Pointer = Load->getPointerOperand();
622 Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
623 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
624 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
625 Load->getName() + "_p_vec_p");
626 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
627 Load->getName() + "_p_splat_one");
628
629 if (!Aligned)
630 ScalarLoad->setAlignment(8);
631
632 Constant *SplatVector =
633 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
634 getVectorWidth()));
635
636 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
637 SplatVector,
638 Load->getName()
639 + "_p_splat");
640 return VectorLoad;
641}
642
643Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
644 VectorValueMapT &ScalarMaps) {
645 int VectorWidth = getVectorWidth();
646 const Value *Pointer = Load->getPointerOperand();
647 VectorType *VectorType = VectorType::get(
648 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
649
650 Value *Vector = UndefValue::get(VectorType);
651
652 for (int i = 0; i < VectorWidth; i++) {
653 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
654 Value *ScalarLoad = Builder.CreateLoad(NewPointer,
655 Load->getName() + "_p_scalar_");
656 Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
657 Builder.getInt32(i),
658 Load->getName() + "_p_vec_");
659 }
660
661 return Vector;
662}
663
664void VectorBlockGenerator::generateLoad(const LoadInst *Load,
665 ValueMapT &VectorMap,
666 VectorValueMapT &ScalarMaps) {
Hongbin Zheng68794212012-05-06 10:22:19 +0000667 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
668 !VectorType::isValidElementType(Load->getType())) {
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000669 for (int i = 0; i < getVectorWidth(); i++)
670 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
671 GlobalMaps[i]);
672 return;
673 }
674
675 MemoryAccess &Access = Statement.getAccessFor(Load);
676
677 Value *NewLoad;
678 if (Access.isStrideZero(isl_set_copy(Domain)))
679 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
680 else if (Access.isStrideOne(isl_set_copy(Domain)))
681 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
682 else
683 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
684
685 VectorMap[Load] = NewLoad;
686}
687
688void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
689 ValueMapT &VectorMap,
690 VectorValueMapT &ScalarMaps) {
691 int VectorWidth = getVectorWidth();
692 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
693 ScalarMaps);
694
695 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
696
697 const CastInst *Cast = dyn_cast<CastInst>(Inst);
698 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
699 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
700}
701
702void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
703 ValueMapT &VectorMap,
704 VectorValueMapT &ScalarMaps) {
705 Value *OpZero = Inst->getOperand(0);
706 Value *OpOne = Inst->getOperand(1);
707
708 Value *NewOpZero, *NewOpOne;
709 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
710 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
711
712 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero,
713 NewOpOne,
714 Inst->getName() + "p_vec");
715 VectorMap[Inst] = NewInst;
716}
717
718void VectorBlockGenerator::copyStore(const StoreInst *Store,
719 ValueMapT &VectorMap,
720 VectorValueMapT &ScalarMaps) {
721 int VectorWidth = getVectorWidth();
722
723 MemoryAccess &Access = Statement.getAccessFor(Store);
724
725 const Value *Pointer = Store->getPointerOperand();
726 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
727 ScalarMaps);
728
729 if (Access.isStrideOne(isl_set_copy(Domain))) {
730 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
731 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
732
733 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
734 "vector_ptr");
735 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
736
737 if (!Aligned)
738 Store->setAlignment(8);
739 } else {
740 for (unsigned i = 0; i < ScalarMaps.size(); i++) {
741 Value *Scalar = Builder.CreateExtractElement(Vector,
742 Builder.getInt32(i));
743 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
744 Builder.CreateStore(Scalar, NewPointer);
745 }
746 }
747}
748
749bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
750 ValueMapT &VectorMap) {
751 for (Instruction::const_op_iterator OI = Inst->op_begin(),
752 OE = Inst->op_end(); OI != OE; ++OI)
753 if (VectorMap.count(*OI))
754 return true;
755 return false;
756}
757
758bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
759 ValueMapT &VectorMap,
760 VectorValueMapT &ScalarMaps) {
761 bool HasVectorOperand = false;
762 int VectorWidth = getVectorWidth();
763
764 for (Instruction::const_op_iterator OI = Inst->op_begin(),
765 OE = Inst->op_end(); OI != OE; ++OI) {
766 ValueMapT::iterator VecOp = VectorMap.find(*OI);
767
768 if (VecOp == VectorMap.end())
769 continue;
770
771 HasVectorOperand = true;
772 Value *NewVector = VecOp->second;
773
774 for (int i = 0; i < VectorWidth; ++i) {
775 ValueMapT &SM = ScalarMaps[i];
776
777 // If there is one scalar extracted, all scalar elements should have
778 // already been extracted by the code here. So no need to check for the
779 // existance of all of them.
780 if (SM.count(*OI))
781 break;
782
783 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
784 }
785 }
786
787 return HasVectorOperand;
788}
789
790void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
791 ValueMapT &VectorMap,
792 VectorValueMapT &ScalarMaps) {
793 bool HasVectorOperand;
794 int VectorWidth = getVectorWidth();
795
796 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
797
798 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
799 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
800
801 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
802 return;
803
804 // Make the result available as vector value.
805 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
806 Value *Vector = UndefValue::get(VectorType);
807
808 for (int i = 0; i < VectorWidth; i++)
809 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
810 Builder.getInt32(i));
811
812 VectorMap[Inst] = Vector;
813}
814
815int VectorBlockGenerator::getVectorWidth() {
816 return GlobalMaps.size();
817}
818
819void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
820 ValueMapT &VectorMap,
821 VectorValueMapT &ScalarMaps) {
822 // Terminator instructions control the control flow. They are explicitly
823 // expressed in the clast and do not need to be copied.
824 if (Inst->isTerminator())
825 return;
826
Tobias Grossere71c6ab2012-04-27 16:36:14 +0000827 if (isSCEVIgnore(Inst))
828 return;
829
Hongbin Zheng3b11a162012-04-25 13:16:49 +0000830 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
831 generateLoad(Load, VectorMap, ScalarMaps);
832 return;
833 }
834
835 if (hasVectorOperands(Inst, VectorMap)) {
836 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
837 copyStore(Store, VectorMap, ScalarMaps);
838 return;
839 }
840
841 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
842 copyUnaryInst(Unary, VectorMap, ScalarMaps);
843 return;
844 }
845
846 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
847 copyBinaryInst(Binary, VectorMap, ScalarMaps);
848 return;
849 }
850
851 // Falltrough: We generate scalar instructions, if we don't know how to
852 // generate vector code.
853 }
854
855 copyInstScalarized(Inst, VectorMap, ScalarMaps);
856}
857
858void VectorBlockGenerator::copyBB() {
859 BasicBlock *BB = Statement.getBasicBlock();
860 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
861 Builder.GetInsertPoint(), P);
862 CopyBB->setName("polly.stmt." + BB->getName());
863 Builder.SetInsertPoint(CopyBB->begin());
864
865 // Create two maps that store the mapping from the original instructions of
866 // the old basic block to their copies in the new basic block. Those maps
867 // are basic block local.
868 //
869 // As vector code generation is supported there is one map for scalar values
870 // and one for vector values.
871 //
872 // In case we just do scalar code generation, the vectorMap is not used and
873 // the scalarMap has just one dimension, which contains the mapping.
874 //
875 // In case vector code generation is done, an instruction may either appear
876 // in the vector map once (as it is calculating >vectorwidth< values at a
877 // time. Or (if the values are calculated using scalar operations), it
878 // appears once in every dimension of the scalarMap.
879 VectorValueMapT ScalarBlockMap(getVectorWidth());
880 ValueMapT VectorBlockMap;
881
882 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
883 II != IE; ++II)
884 copyInstruction(II, VectorBlockMap, ScalarBlockMap);
885}