blob: 2bc4a709b1df3aba9b7c6df39060033bf86b7eeb [file] [log] [blame]
Stephen Hinesdb169182012-01-05 18:46:36 -08001/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Stephen Hines6e9e89d2012-07-27 19:16:04 -070017#include "bcc/Assert.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070018#include "bcc/Renderscript/RSTransforms.h"
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070019
20#include <cstdlib>
21
Stephen Hinesb730e232013-01-09 15:31:36 -080022#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/Module.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080027#include <llvm/Pass.h>
Stephen Hines7ae3a822012-09-14 19:24:58 -070028#include <llvm/Support/raw_ostream.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080029#include <llvm/IR/DataLayout.h>
30#include <llvm/IR/Type.h>
Stephen Hinesdb169182012-01-05 18:46:36 -080031
Zonr Changc72c4dd2012-04-12 15:38:53 +080032#include "bcc/Config/Config.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070033#include "bcc/Renderscript/RSInfo.h"
Zonr Changef73a242012-04-12 16:44:01 +080034#include "bcc/Support/Log.h"
Stephen Hinesdb169182012-01-05 18:46:36 -080035
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070036using namespace bcc;
37
Stephen Hinesdb169182012-01-05 18:46:36 -080038namespace {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070039
40/* RSForEachExpandPass - This pass operates on functions that are able to be
41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
42 * ForEach-able function to be invoked over the appropriate data cells of the
43 * input/output allocations (adjusting other relevant parameters as we go). We
44 * support doing this for any ForEach-able compute kernels. The new function
45 * name is the original function name followed by ".expand". Note that we
46 * still generate code for the original function.
47 */
48class RSForEachExpandPass : public llvm::ModulePass {
49private:
Stephen Hinesdb169182012-01-05 18:46:36 -080050 static char ID;
51
52 llvm::Module *M;
53 llvm::LLVMContext *C;
54
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070055 const RSInfo::ExportForeachFuncListTy &mFuncs;
Stephen Hinescc366e52012-02-21 17:22:04 -080056
Stephen Hines2b040862012-07-27 20:18:08 -070057 // Turns on optimization of allocation stride values.
58 bool mEnableStepOpt;
59
Stephen Hinescc366e52012-02-21 17:22:04 -080060 uint32_t getRootSignature(llvm::Function *F) {
Stephen Hinesdb169182012-01-05 18:46:36 -080061 const llvm::NamedMDNode *ExportForEachMetadata =
62 M->getNamedMetadata("#rs_export_foreach");
63
64 if (!ExportForEachMetadata) {
65 llvm::SmallVector<llvm::Type*, 8> RootArgTys;
66 for (llvm::Function::arg_iterator B = F->arg_begin(),
67 E = F->arg_end();
68 B != E;
69 ++B) {
70 RootArgTys.push_back(B->getType());
71 }
72
73 // For pre-ICS bitcode, we may not have signature information. In that
74 // case, we use the size of the RootArgTys to select the number of
75 // arguments.
76 return (1 << RootArgTys.size()) - 1;
77 }
78
Stephen Hines7ae3a822012-09-14 19:24:58 -070079 if (ExportForEachMetadata->getNumOperands() == 0) {
80 return 0;
81 }
82
Stephen Hines6e9e89d2012-07-27 19:16:04 -070083 bccAssert(ExportForEachMetadata->getNumOperands() > 0);
Stephen Hinesdb169182012-01-05 18:46:36 -080084
Stephen Hinescc366e52012-02-21 17:22:04 -080085 // We only handle the case for legacy root() functions here, so this is
86 // hard-coded to look at only the first such function.
Stephen Hinesdb169182012-01-05 18:46:36 -080087 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
88 if (SigNode != NULL && SigNode->getNumOperands() == 1) {
89 llvm::Value *SigVal = SigNode->getOperand(0);
90 if (SigVal->getValueID() == llvm::Value::MDStringVal) {
91 llvm::StringRef SigString =
92 static_cast<llvm::MDString*>(SigVal)->getString();
93 uint32_t Signature = 0;
94 if (SigString.getAsInteger(10, Signature)) {
95 ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
96 return 0;
97 }
98 return Signature;
99 }
100 }
101
102 return 0;
103 }
104
Stephen Hines2b040862012-07-27 20:18:08 -0700105 // Get the actual value we should use to step through an allocation.
Stephen Hinesb730e232013-01-09 15:31:36 -0800106 // DL - Target Data size/layout information.
Stephen Hines2b040862012-07-27 20:18:08 -0700107 // T - Type of allocation (should be a pointer).
108 // OrigStep - Original step increment (root.expand() input from driver).
Stephen Hinesb730e232013-01-09 15:31:36 -0800109 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
Stephen Hines2b040862012-07-27 20:18:08 -0700110 llvm::Value *OrigStep) {
Stephen Hinesb730e232013-01-09 15:31:36 -0800111 bccAssert(DL);
Stephen Hines2b040862012-07-27 20:18:08 -0700112 bccAssert(T);
113 bccAssert(OrigStep);
114 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
115 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
116 if (mEnableStepOpt && T != VoidPtrTy && PT) {
117 llvm::Type *ET = PT->getElementType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800118 uint64_t ETSize = DL->getTypeAllocSize(ET);
Stephen Hines2b040862012-07-27 20:18:08 -0700119 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
120 return llvm::ConstantInt::get(Int32Ty, ETSize);
121 } else {
122 return OrigStep;
123 }
124 }
125
Stephen Hinesdb169182012-01-05 18:46:36 -0800126 static bool hasIn(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700127 return Signature & 0x01;
Stephen Hinesdb169182012-01-05 18:46:36 -0800128 }
129
130 static bool hasOut(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700131 return Signature & 0x02;
Stephen Hinesdb169182012-01-05 18:46:36 -0800132 }
133
134 static bool hasUsrData(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700135 return Signature & 0x04;
Stephen Hinesdb169182012-01-05 18:46:36 -0800136 }
137
138 static bool hasX(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700139 return Signature & 0x08;
Stephen Hinesdb169182012-01-05 18:46:36 -0800140 }
141
142 static bool hasY(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700143 return Signature & 0x10;
Stephen Hinesdb169182012-01-05 18:46:36 -0800144 }
145
Stephen Hines7ae3a822012-09-14 19:24:58 -0700146 static bool isKernel(uint32_t Signature) {
147 return Signature & 0x20;
148 }
149
Tobias Grosser8ae46072013-06-20 14:00:31 -0700150 /// @brief Returns the type of the ForEach stub parameter structure.
151 ///
152 /// Renderscript uses a single structure in which all parameters are passed
153 /// to keep the signature of the expanded function independent of the
154 /// parameters passed to it.
155 llvm::Type *getForeachStubTy() {
Stephen Hinesdb169182012-01-05 18:46:36 -0800156 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
157 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
158 llvm::Type *SizeTy = Int32Ty;
Stephen Hinesdb169182012-01-05 18:46:36 -0800159 /* Defined in frameworks/base/libs/rs/rs_hal.h:
160 *
161 * struct RsForEachStubParamStruct {
162 * const void *in;
163 * void *out;
164 * const void *usr;
165 * size_t usr_len;
166 * uint32_t x;
167 * uint32_t y;
168 * uint32_t z;
169 * uint32_t lod;
170 * enum RsAllocationCubemapFace face;
171 * uint32_t ar[16];
172 * };
173 */
174 llvm::SmallVector<llvm::Type*, 9> StructTys;
175 StructTys.push_back(VoidPtrTy); // const void *in
176 StructTys.push_back(VoidPtrTy); // void *out
177 StructTys.push_back(VoidPtrTy); // const void *usr
178 StructTys.push_back(SizeTy); // size_t usr_len
179 StructTys.push_back(Int32Ty); // uint32_t x
180 StructTys.push_back(Int32Ty); // uint32_t y
181 StructTys.push_back(Int32Ty); // uint32_t z
182 StructTys.push_back(Int32Ty); // uint32_t lod
183 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
184 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
185
Tobias Grosser8ae46072013-06-20 14:00:31 -0700186 return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
187 }
188
Tobias Grosser357b5862013-06-20 14:12:46 -0700189 /// @brief Create skeleton of the expanded function.
190 ///
191 /// This creates a function with the following signature:
192 ///
193 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
194 /// uint32_t instep, uint32_t outstep)
195 ///
196 llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
197 llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
198 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
199
200 llvm::SmallVector<llvm::Type*, 8> ParamTys;
201 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
202 ParamTys.push_back(Int32Ty); // uint32_t x1
203 ParamTys.push_back(Int32Ty); // uint32_t x2
204 ParamTys.push_back(Int32Ty); // uint32_t instep
205 ParamTys.push_back(Int32Ty); // uint32_t outstep
206
207 llvm::FunctionType *FT =
208 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
209 return llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
210 OldName + ".expand", M);
211 }
212
Tobias Grosser8ae46072013-06-20 14:00:31 -0700213public:
214 RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
215 bool pEnableStepOpt)
216 : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
217 mEnableStepOpt(pEnableStepOpt) {
218 }
219
220 /* Performs the actual optimization on a selected function. On success, the
221 * Module will contain a new function of the name "<NAME>.expand" that
222 * invokes <NAME>() in a loop with the appropriate parameters.
223 */
224 bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
225 ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
226
227 if (!Signature) {
228 Signature = getRootSignature(F);
229 if (!Signature) {
230 // We couldn't determine how to expand this function based on its
231 // function signature.
232 return false;
233 }
234 }
235
236 llvm::DataLayout DL(M);
237
238 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
Tobias Grosser357b5862013-06-20 14:12:46 -0700239 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
Stephen Hinesdb169182012-01-05 18:46:36 -0800240
241 // Create and name the actual arguments to this expanded function.
242 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
243 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
244 E = ExpandedFunc->arg_end();
245 B != E;
246 ++B) {
247 ArgVec.push_back(B);
248 }
249
250 if (ArgVec.size() != 5) {
Shih-wei Liao89e84902012-01-17 03:07:40 -0800251 ALOGE("Incorrect number of arguments to function: %zu",
252 ArgVec.size());
Stephen Hinesdb169182012-01-05 18:46:36 -0800253 return false;
254 }
255 llvm::Value *Arg_p = ArgVec[0];
256 llvm::Value *Arg_x1 = ArgVec[1];
257 llvm::Value *Arg_x2 = ArgVec[2];
258 llvm::Value *Arg_instep = ArgVec[3];
259 llvm::Value *Arg_outstep = ArgVec[4];
260
261 Arg_p->setName("p");
262 Arg_x1->setName("x1");
263 Arg_x2->setName("x2");
Stephen Hines2b040862012-07-27 20:18:08 -0700264 Arg_instep->setName("arg_instep");
265 Arg_outstep->setName("arg_outstep");
266
267 llvm::Value *InStep = NULL;
268 llvm::Value *OutStep = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800269
270 // Construct the actual function body.
271 llvm::BasicBlock *Begin =
272 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
273 llvm::IRBuilder<> Builder(Begin);
274
275 // uint32_t X = x1;
276 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
277 Builder.CreateStore(Arg_x1, AX);
278
Stephen Hinescc366e52012-02-21 17:22:04 -0800279 // Collect and construct the arguments for the kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800280 // Note that we load any loop-invariant arguments before entering the Loop.
281 llvm::Function::arg_iterator Args = F->arg_begin();
282
283 llvm::Type *InTy = NULL;
284 llvm::AllocaInst *AIn = NULL;
285 if (hasIn(Signature)) {
286 InTy = Args->getType();
287 AIn = Builder.CreateAlloca(InTy, 0, "AIn");
Stephen Hinesb730e232013-01-09 15:31:36 -0800288 InStep = getStepValue(&DL, InTy, Arg_instep);
Stephen Hines2b040862012-07-27 20:18:08 -0700289 InStep->setName("instep");
Stephen Hinesdb169182012-01-05 18:46:36 -0800290 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
291 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
292 Args++;
293 }
294
295 llvm::Type *OutTy = NULL;
296 llvm::AllocaInst *AOut = NULL;
297 if (hasOut(Signature)) {
298 OutTy = Args->getType();
299 AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
Stephen Hinesb730e232013-01-09 15:31:36 -0800300 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines2b040862012-07-27 20:18:08 -0700301 OutStep->setName("outstep");
Stephen Hinesdb169182012-01-05 18:46:36 -0800302 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
303 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
304 Args++;
305 }
306
307 llvm::Value *UsrData = NULL;
308 if (hasUsrData(Signature)) {
309 llvm::Type *UsrDataTy = Args->getType();
310 UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
311 Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
312 UsrData->setName("UsrData");
313 Args++;
314 }
315
316 if (hasX(Signature)) {
317 Args++;
318 }
319
320 llvm::Value *Y = NULL;
321 if (hasY(Signature)) {
322 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
323 Args++;
324 }
325
Stephen Hines6e9e89d2012-07-27 19:16:04 -0700326 bccAssert(Args == F->arg_end());
Stephen Hinesdb169182012-01-05 18:46:36 -0800327
328 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
329 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
330
331 // if (x1 < x2) goto Loop; else goto Exit;
332 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
333 Builder.CreateCondBr(Cond, Loop, Exit);
334
335 // Loop:
336 Builder.SetInsertPoint(Loop);
337
Stephen Hinescc366e52012-02-21 17:22:04 -0800338 // Populate the actual call to kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800339 llvm::SmallVector<llvm::Value*, 8> RootArgs;
340
Stephen Hines7ae3a822012-09-14 19:24:58 -0700341 llvm::Value *InPtr = NULL;
342 llvm::Value *OutPtr = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800343
344 if (AIn) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700345 InPtr = Builder.CreateLoad(AIn, "InPtr");
346 RootArgs.push_back(InPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800347 }
348
349 if (AOut) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700350 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
351 RootArgs.push_back(OutPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800352 }
353
354 if (UsrData) {
355 RootArgs.push_back(UsrData);
356 }
357
358 // We always have to load X, since it is used to iterate through the loop.
359 llvm::Value *X = Builder.CreateLoad(AX, "X");
360 if (hasX(Signature)) {
361 RootArgs.push_back(X);
362 }
363
364 if (Y) {
365 RootArgs.push_back(Y);
366 }
367
368 Builder.CreateCall(F, RootArgs);
369
Stephen Hines7ae3a822012-09-14 19:24:58 -0700370 if (InPtr) {
371 // InPtr += instep
Stephen Hinesdb169182012-01-05 18:46:36 -0800372 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
Stephen Hines7ae3a822012-09-14 19:24:58 -0700373 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800374 Builder.CreateStore(NewIn, AIn);
375 }
376
Stephen Hines7ae3a822012-09-14 19:24:58 -0700377 if (OutPtr) {
378 // OutPtr += outstep
Stephen Hinesdb169182012-01-05 18:46:36 -0800379 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
Stephen Hines7ae3a822012-09-14 19:24:58 -0700380 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
381 Builder.CreateStore(NewOut, AOut);
382 }
383
384 // X++;
385 llvm::Value *XPlusOne =
386 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
387 Builder.CreateStore(XPlusOne, AX);
388
389 // If (X < x2) goto Loop; else goto Exit;
390 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
391 Builder.CreateCondBr(Cond, Loop, Exit);
392
393 // Exit:
394 Builder.SetInsertPoint(Exit);
395 Builder.CreateRetVoid();
396
397 return true;
398 }
399
400 /* Expand a pass-by-value kernel.
401 */
402 bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
403 bccAssert(isKernel(Signature));
404 ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
405
406 // TODO: Refactor this to share functionality with ExpandFunction.
Stephen Hinesb730e232013-01-09 15:31:36 -0800407 llvm::DataLayout DL(M);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700408
Stephen Hines7ae3a822012-09-14 19:24:58 -0700409 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
Tobias Grosser357b5862013-06-20 14:12:46 -0700410 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700411
412 // Create and name the actual arguments to this expanded function.
413 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
414 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
415 E = ExpandedFunc->arg_end();
416 B != E;
417 ++B) {
418 ArgVec.push_back(B);
419 }
420
421 if (ArgVec.size() != 5) {
422 ALOGE("Incorrect number of arguments to function: %zu",
423 ArgVec.size());
424 return false;
425 }
426 llvm::Value *Arg_p = ArgVec[0];
427 llvm::Value *Arg_x1 = ArgVec[1];
428 llvm::Value *Arg_x2 = ArgVec[2];
429 llvm::Value *Arg_instep = ArgVec[3];
430 llvm::Value *Arg_outstep = ArgVec[4];
431
432 Arg_p->setName("p");
433 Arg_x1->setName("x1");
434 Arg_x2->setName("x2");
435 Arg_instep->setName("arg_instep");
436 Arg_outstep->setName("arg_outstep");
437
438 llvm::Value *InStep = NULL;
439 llvm::Value *OutStep = NULL;
440
441 // Construct the actual function body.
442 llvm::BasicBlock *Begin =
443 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
444 llvm::IRBuilder<> Builder(Begin);
445
446 // uint32_t X = x1;
447 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
448 Builder.CreateStore(Arg_x1, AX);
449
450 // Collect and construct the arguments for the kernel().
451 // Note that we load any loop-invariant arguments before entering the Loop.
452 llvm::Function::arg_iterator Args = F->arg_begin();
453
Stephen Hines74a4b082012-09-21 19:26:48 -0700454 llvm::Type *OutTy = NULL;
455 llvm::AllocaInst *AOut = NULL;
456 bool PassOutByReference = false;
457 if (hasOut(Signature)) {
458 llvm::Type *OutBaseTy = F->getReturnType();
459 if (OutBaseTy->isVoidTy()) {
460 PassOutByReference = true;
461 OutTy = Args->getType();
462 Args++;
463 } else {
464 OutTy = OutBaseTy->getPointerTo();
465 // We don't increment Args, since we are using the actual return type.
466 }
467 AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
Stephen Hinesb730e232013-01-09 15:31:36 -0800468 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines74a4b082012-09-21 19:26:48 -0700469 OutStep->setName("outstep");
470 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
471 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
472 }
473
Stephen Hines7ae3a822012-09-14 19:24:58 -0700474 llvm::Type *InBaseTy = NULL;
475 llvm::Type *InTy = NULL;
476 llvm::AllocaInst *AIn = NULL;
477 if (hasIn(Signature)) {
478 InBaseTy = Args->getType();
479 InTy =InBaseTy->getPointerTo();
480 AIn = Builder.CreateAlloca(InTy, 0, "AIn");
Stephen Hinesb730e232013-01-09 15:31:36 -0800481 InStep = getStepValue(&DL, InTy, Arg_instep);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700482 InStep->setName("instep");
483 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
484 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
485 Args++;
486 }
487
Stephen Hines7ae3a822012-09-14 19:24:58 -0700488 // No usrData parameter on kernels.
489 bccAssert(!hasUsrData(Signature));
490
491 if (hasX(Signature)) {
492 Args++;
493 }
494
495 llvm::Value *Y = NULL;
496 if (hasY(Signature)) {
497 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
498 Args++;
499 }
500
501 bccAssert(Args == F->arg_end());
502
503 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
504 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
505
506 // if (x1 < x2) goto Loop; else goto Exit;
507 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
508 Builder.CreateCondBr(Cond, Loop, Exit);
509
510 // Loop:
511 Builder.SetInsertPoint(Loop);
512
513 // Populate the actual call to kernel().
514 llvm::SmallVector<llvm::Value*, 8> RootArgs;
515
516 llvm::Value *InPtr = NULL;
517 llvm::Value *In = NULL;
518 llvm::Value *OutPtr = NULL;
519
Stephen Hines74a4b082012-09-21 19:26:48 -0700520 if (PassOutByReference) {
521 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
522 RootArgs.push_back(OutPtr);
523 }
524
Stephen Hines7ae3a822012-09-14 19:24:58 -0700525 if (AIn) {
526 InPtr = Builder.CreateLoad(AIn, "InPtr");
527 In = Builder.CreateLoad(InPtr, "In");
528 RootArgs.push_back(In);
529 }
530
531 // We always have to load X, since it is used to iterate through the loop.
532 llvm::Value *X = Builder.CreateLoad(AX, "X");
533 if (hasX(Signature)) {
534 RootArgs.push_back(X);
535 }
536
537 if (Y) {
538 RootArgs.push_back(Y);
539 }
540
541 llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
542
Stephen Hines74a4b082012-09-21 19:26:48 -0700543 if (AOut && !PassOutByReference) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700544 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
545 Builder.CreateStore(RetVal, OutPtr);
546 }
547
548 if (InPtr) {
549 // InPtr += instep
550 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
551 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
552 Builder.CreateStore(NewIn, AIn);
553 }
554
555 if (OutPtr) {
556 // OutPtr += outstep
557 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
558 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800559 Builder.CreateStore(NewOut, AOut);
560 }
561
562 // X++;
563 llvm::Value *XPlusOne =
564 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
565 Builder.CreateStore(XPlusOne, AX);
566
567 // If (X < x2) goto Loop; else goto Exit;
568 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
569 Builder.CreateCondBr(Cond, Loop, Exit);
570
571 // Exit:
572 Builder.SetInsertPoint(Exit);
573 Builder.CreateRetVoid();
574
575 return true;
576 }
577
578 virtual bool runOnModule(llvm::Module &M) {
Stephen Hinescc366e52012-02-21 17:22:04 -0800579 bool Changed = false;
Stephen Hinesdb169182012-01-05 18:46:36 -0800580 this->M = &M;
581 C = &M.getContext();
582
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700583 for (RSInfo::ExportForeachFuncListTy::const_iterator
584 func_iter = mFuncs.begin(), func_end = mFuncs.end();
585 func_iter != func_end; func_iter++) {
586 const char *name = func_iter->first;
587 uint32_t signature = func_iter->second;
588 llvm::Function *kernel = M.getFunction(name);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700589 if (kernel && isKernel(signature)) {
590 Changed |= ExpandKernel(kernel, signature);
591 }
592 else if (kernel && kernel->getReturnType()->isVoidTy()) {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700593 Changed |= ExpandFunction(kernel, signature);
Stephen Hinescc366e52012-02-21 17:22:04 -0800594 }
Stephen Hinesdb169182012-01-05 18:46:36 -0800595 }
596
Stephen Hinescc366e52012-02-21 17:22:04 -0800597 return Changed;
Stephen Hinesdb169182012-01-05 18:46:36 -0800598 }
599
600 virtual const char *getPassName() const {
601 return "ForEach-able Function Expansion";
602 }
603
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700604}; // end RSForEachExpandPass
Stephen Hinesdb169182012-01-05 18:46:36 -0800605
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700606} // end anonymous namespace
607
608char RSForEachExpandPass::ID = 0;
Stephen Hinesdb169182012-01-05 18:46:36 -0800609
610namespace bcc {
611
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700612llvm::ModulePass *
Stephen Hines2b040862012-07-27 20:18:08 -0700613createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
614 bool pEnableStepOpt){
615 return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700616}
Stephen Hinesdb169182012-01-05 18:46:36 -0800617
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700618} // end namespace bcc