blob: dec8bf968cd072227237ff30edf7578f738bf121 [file] [log] [blame]
Stephen Hinesdb169182012-01-05 18:46:36 -08001/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Stephen Hines6e9e89d2012-07-27 19:16:04 -070017#include "bcc/Assert.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070018#include "bcc/Renderscript/RSTransforms.h"
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070019
20#include <cstdlib>
21
Stephen Hinesb730e232013-01-09 15:31:36 -080022#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/Module.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080027#include <llvm/Pass.h>
Stephen Hines7ae3a822012-09-14 19:24:58 -070028#include <llvm/Support/raw_ostream.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080029#include <llvm/IR/DataLayout.h>
30#include <llvm/IR/Type.h>
Stephen Hinesdb169182012-01-05 18:46:36 -080031
Zonr Changc72c4dd2012-04-12 15:38:53 +080032#include "bcc/Config/Config.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070033#include "bcc/Renderscript/RSInfo.h"
Zonr Changef73a242012-04-12 16:44:01 +080034#include "bcc/Support/Log.h"
Stephen Hinesdb169182012-01-05 18:46:36 -080035
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070036using namespace bcc;
37
Stephen Hinesdb169182012-01-05 18:46:36 -080038namespace {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070039
40/* RSForEachExpandPass - This pass operates on functions that are able to be
41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
42 * ForEach-able function to be invoked over the appropriate data cells of the
43 * input/output allocations (adjusting other relevant parameters as we go). We
44 * support doing this for any ForEach-able compute kernels. The new function
45 * name is the original function name followed by ".expand". Note that we
46 * still generate code for the original function.
47 */
48class RSForEachExpandPass : public llvm::ModulePass {
49private:
Stephen Hinesdb169182012-01-05 18:46:36 -080050 static char ID;
51
52 llvm::Module *M;
53 llvm::LLVMContext *C;
54
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070055 const RSInfo::ExportForeachFuncListTy &mFuncs;
Stephen Hinescc366e52012-02-21 17:22:04 -080056
Stephen Hines2b040862012-07-27 20:18:08 -070057 // Turns on optimization of allocation stride values.
58 bool mEnableStepOpt;
59
Stephen Hinescc366e52012-02-21 17:22:04 -080060 uint32_t getRootSignature(llvm::Function *F) {
Stephen Hinesdb169182012-01-05 18:46:36 -080061 const llvm::NamedMDNode *ExportForEachMetadata =
62 M->getNamedMetadata("#rs_export_foreach");
63
64 if (!ExportForEachMetadata) {
65 llvm::SmallVector<llvm::Type*, 8> RootArgTys;
66 for (llvm::Function::arg_iterator B = F->arg_begin(),
67 E = F->arg_end();
68 B != E;
69 ++B) {
70 RootArgTys.push_back(B->getType());
71 }
72
73 // For pre-ICS bitcode, we may not have signature information. In that
74 // case, we use the size of the RootArgTys to select the number of
75 // arguments.
76 return (1 << RootArgTys.size()) - 1;
77 }
78
Stephen Hines7ae3a822012-09-14 19:24:58 -070079 if (ExportForEachMetadata->getNumOperands() == 0) {
80 return 0;
81 }
82
Stephen Hines6e9e89d2012-07-27 19:16:04 -070083 bccAssert(ExportForEachMetadata->getNumOperands() > 0);
Stephen Hinesdb169182012-01-05 18:46:36 -080084
Stephen Hinescc366e52012-02-21 17:22:04 -080085 // We only handle the case for legacy root() functions here, so this is
86 // hard-coded to look at only the first such function.
Stephen Hinesdb169182012-01-05 18:46:36 -080087 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
88 if (SigNode != NULL && SigNode->getNumOperands() == 1) {
89 llvm::Value *SigVal = SigNode->getOperand(0);
90 if (SigVal->getValueID() == llvm::Value::MDStringVal) {
91 llvm::StringRef SigString =
92 static_cast<llvm::MDString*>(SigVal)->getString();
93 uint32_t Signature = 0;
94 if (SigString.getAsInteger(10, Signature)) {
95 ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
96 return 0;
97 }
98 return Signature;
99 }
100 }
101
102 return 0;
103 }
104
Stephen Hines2b040862012-07-27 20:18:08 -0700105 // Get the actual value we should use to step through an allocation.
Stephen Hinesb730e232013-01-09 15:31:36 -0800106 // DL - Target Data size/layout information.
Stephen Hines2b040862012-07-27 20:18:08 -0700107 // T - Type of allocation (should be a pointer).
108 // OrigStep - Original step increment (root.expand() input from driver).
Stephen Hinesb730e232013-01-09 15:31:36 -0800109 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
Stephen Hines2b040862012-07-27 20:18:08 -0700110 llvm::Value *OrigStep) {
Stephen Hinesb730e232013-01-09 15:31:36 -0800111 bccAssert(DL);
Stephen Hines2b040862012-07-27 20:18:08 -0700112 bccAssert(T);
113 bccAssert(OrigStep);
114 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
115 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
116 if (mEnableStepOpt && T != VoidPtrTy && PT) {
117 llvm::Type *ET = PT->getElementType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800118 uint64_t ETSize = DL->getTypeAllocSize(ET);
Stephen Hines2b040862012-07-27 20:18:08 -0700119 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
120 return llvm::ConstantInt::get(Int32Ty, ETSize);
121 } else {
122 return OrigStep;
123 }
124 }
125
Stephen Hinesdb169182012-01-05 18:46:36 -0800126 static bool hasIn(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700127 return Signature & 0x01;
Stephen Hinesdb169182012-01-05 18:46:36 -0800128 }
129
130 static bool hasOut(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700131 return Signature & 0x02;
Stephen Hinesdb169182012-01-05 18:46:36 -0800132 }
133
134 static bool hasUsrData(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700135 return Signature & 0x04;
Stephen Hinesdb169182012-01-05 18:46:36 -0800136 }
137
138 static bool hasX(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700139 return Signature & 0x08;
Stephen Hinesdb169182012-01-05 18:46:36 -0800140 }
141
142 static bool hasY(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700143 return Signature & 0x10;
Stephen Hinesdb169182012-01-05 18:46:36 -0800144 }
145
Stephen Hines7ae3a822012-09-14 19:24:58 -0700146 static bool isKernel(uint32_t Signature) {
147 return Signature & 0x20;
148 }
149
Tobias Grosser8ae46072013-06-20 14:00:31 -0700150 /// @brief Returns the type of the ForEach stub parameter structure.
151 ///
152 /// Renderscript uses a single structure in which all parameters are passed
153 /// to keep the signature of the expanded function independent of the
154 /// parameters passed to it.
155 llvm::Type *getForeachStubTy() {
Stephen Hinesdb169182012-01-05 18:46:36 -0800156 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
157 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
158 llvm::Type *SizeTy = Int32Ty;
Stephen Hinesdb169182012-01-05 18:46:36 -0800159 /* Defined in frameworks/base/libs/rs/rs_hal.h:
160 *
161 * struct RsForEachStubParamStruct {
162 * const void *in;
163 * void *out;
164 * const void *usr;
165 * size_t usr_len;
166 * uint32_t x;
167 * uint32_t y;
168 * uint32_t z;
169 * uint32_t lod;
170 * enum RsAllocationCubemapFace face;
171 * uint32_t ar[16];
172 * };
173 */
174 llvm::SmallVector<llvm::Type*, 9> StructTys;
175 StructTys.push_back(VoidPtrTy); // const void *in
176 StructTys.push_back(VoidPtrTy); // void *out
177 StructTys.push_back(VoidPtrTy); // const void *usr
178 StructTys.push_back(SizeTy); // size_t usr_len
179 StructTys.push_back(Int32Ty); // uint32_t x
180 StructTys.push_back(Int32Ty); // uint32_t y
181 StructTys.push_back(Int32Ty); // uint32_t z
182 StructTys.push_back(Int32Ty); // uint32_t lod
183 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
184 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
185
Tobias Grosser8ae46072013-06-20 14:00:31 -0700186 return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
187 }
188
189public:
190 RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
191 bool pEnableStepOpt)
192 : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
193 mEnableStepOpt(pEnableStepOpt) {
194 }
195
196 /* Performs the actual optimization on a selected function. On success, the
197 * Module will contain a new function of the name "<NAME>.expand" that
198 * invokes <NAME>() in a loop with the appropriate parameters.
199 */
200 bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
201 ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
202
203 if (!Signature) {
204 Signature = getRootSignature(F);
205 if (!Signature) {
206 // We couldn't determine how to expand this function based on its
207 // function signature.
208 return false;
209 }
210 }
211
212 llvm::DataLayout DL(M);
213
214 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
215 llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
Stephen Hinesdb169182012-01-05 18:46:36 -0800216
217 /* Create the function signature for our expanded function.
218 * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
219 * uint32_t instep, uint32_t outstep)
220 */
221 llvm::SmallVector<llvm::Type*, 8> ParamTys;
222 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
223 ParamTys.push_back(Int32Ty); // uint32_t x1
224 ParamTys.push_back(Int32Ty); // uint32_t x2
225 ParamTys.push_back(Int32Ty); // uint32_t instep
226 ParamTys.push_back(Int32Ty); // uint32_t outstep
227
228 llvm::FunctionType *FT =
229 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
230 llvm::Function *ExpandedFunc =
231 llvm::Function::Create(FT,
232 llvm::GlobalValue::ExternalLinkage,
233 F->getName() + ".expand", M);
234
235 // Create and name the actual arguments to this expanded function.
236 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
237 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
238 E = ExpandedFunc->arg_end();
239 B != E;
240 ++B) {
241 ArgVec.push_back(B);
242 }
243
244 if (ArgVec.size() != 5) {
Shih-wei Liao89e84902012-01-17 03:07:40 -0800245 ALOGE("Incorrect number of arguments to function: %zu",
246 ArgVec.size());
Stephen Hinesdb169182012-01-05 18:46:36 -0800247 return false;
248 }
249 llvm::Value *Arg_p = ArgVec[0];
250 llvm::Value *Arg_x1 = ArgVec[1];
251 llvm::Value *Arg_x2 = ArgVec[2];
252 llvm::Value *Arg_instep = ArgVec[3];
253 llvm::Value *Arg_outstep = ArgVec[4];
254
255 Arg_p->setName("p");
256 Arg_x1->setName("x1");
257 Arg_x2->setName("x2");
Stephen Hines2b040862012-07-27 20:18:08 -0700258 Arg_instep->setName("arg_instep");
259 Arg_outstep->setName("arg_outstep");
260
261 llvm::Value *InStep = NULL;
262 llvm::Value *OutStep = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800263
264 // Construct the actual function body.
265 llvm::BasicBlock *Begin =
266 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
267 llvm::IRBuilder<> Builder(Begin);
268
269 // uint32_t X = x1;
270 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
271 Builder.CreateStore(Arg_x1, AX);
272
Stephen Hinescc366e52012-02-21 17:22:04 -0800273 // Collect and construct the arguments for the kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800274 // Note that we load any loop-invariant arguments before entering the Loop.
275 llvm::Function::arg_iterator Args = F->arg_begin();
276
277 llvm::Type *InTy = NULL;
278 llvm::AllocaInst *AIn = NULL;
279 if (hasIn(Signature)) {
280 InTy = Args->getType();
281 AIn = Builder.CreateAlloca(InTy, 0, "AIn");
Stephen Hinesb730e232013-01-09 15:31:36 -0800282 InStep = getStepValue(&DL, InTy, Arg_instep);
Stephen Hines2b040862012-07-27 20:18:08 -0700283 InStep->setName("instep");
Stephen Hinesdb169182012-01-05 18:46:36 -0800284 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
285 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
286 Args++;
287 }
288
289 llvm::Type *OutTy = NULL;
290 llvm::AllocaInst *AOut = NULL;
291 if (hasOut(Signature)) {
292 OutTy = Args->getType();
293 AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
Stephen Hinesb730e232013-01-09 15:31:36 -0800294 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines2b040862012-07-27 20:18:08 -0700295 OutStep->setName("outstep");
Stephen Hinesdb169182012-01-05 18:46:36 -0800296 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
297 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
298 Args++;
299 }
300
301 llvm::Value *UsrData = NULL;
302 if (hasUsrData(Signature)) {
303 llvm::Type *UsrDataTy = Args->getType();
304 UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
305 Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
306 UsrData->setName("UsrData");
307 Args++;
308 }
309
310 if (hasX(Signature)) {
311 Args++;
312 }
313
314 llvm::Value *Y = NULL;
315 if (hasY(Signature)) {
316 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
317 Args++;
318 }
319
Stephen Hines6e9e89d2012-07-27 19:16:04 -0700320 bccAssert(Args == F->arg_end());
Stephen Hinesdb169182012-01-05 18:46:36 -0800321
322 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
323 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
324
325 // if (x1 < x2) goto Loop; else goto Exit;
326 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
327 Builder.CreateCondBr(Cond, Loop, Exit);
328
329 // Loop:
330 Builder.SetInsertPoint(Loop);
331
Stephen Hinescc366e52012-02-21 17:22:04 -0800332 // Populate the actual call to kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800333 llvm::SmallVector<llvm::Value*, 8> RootArgs;
334
Stephen Hines7ae3a822012-09-14 19:24:58 -0700335 llvm::Value *InPtr = NULL;
336 llvm::Value *OutPtr = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800337
338 if (AIn) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700339 InPtr = Builder.CreateLoad(AIn, "InPtr");
340 RootArgs.push_back(InPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800341 }
342
343 if (AOut) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700344 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
345 RootArgs.push_back(OutPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800346 }
347
348 if (UsrData) {
349 RootArgs.push_back(UsrData);
350 }
351
352 // We always have to load X, since it is used to iterate through the loop.
353 llvm::Value *X = Builder.CreateLoad(AX, "X");
354 if (hasX(Signature)) {
355 RootArgs.push_back(X);
356 }
357
358 if (Y) {
359 RootArgs.push_back(Y);
360 }
361
362 Builder.CreateCall(F, RootArgs);
363
Stephen Hines7ae3a822012-09-14 19:24:58 -0700364 if (InPtr) {
365 // InPtr += instep
Stephen Hinesdb169182012-01-05 18:46:36 -0800366 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
Stephen Hines7ae3a822012-09-14 19:24:58 -0700367 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800368 Builder.CreateStore(NewIn, AIn);
369 }
370
Stephen Hines7ae3a822012-09-14 19:24:58 -0700371 if (OutPtr) {
372 // OutPtr += outstep
Stephen Hinesdb169182012-01-05 18:46:36 -0800373 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
Stephen Hines7ae3a822012-09-14 19:24:58 -0700374 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
375 Builder.CreateStore(NewOut, AOut);
376 }
377
378 // X++;
379 llvm::Value *XPlusOne =
380 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
381 Builder.CreateStore(XPlusOne, AX);
382
383 // If (X < x2) goto Loop; else goto Exit;
384 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
385 Builder.CreateCondBr(Cond, Loop, Exit);
386
387 // Exit:
388 Builder.SetInsertPoint(Exit);
389 Builder.CreateRetVoid();
390
391 return true;
392 }
393
394 /* Expand a pass-by-value kernel.
395 */
396 bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
397 bccAssert(isKernel(Signature));
398 ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
399
400 // TODO: Refactor this to share functionality with ExpandFunction.
Stephen Hinesb730e232013-01-09 15:31:36 -0800401 llvm::DataLayout DL(M);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700402
Stephen Hines7ae3a822012-09-14 19:24:58 -0700403 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
Tobias Grosser8ae46072013-06-20 14:00:31 -0700404 llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
Stephen Hines7ae3a822012-09-14 19:24:58 -0700405
406 /* Create the function signature for our expanded function.
407 * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
408 * uint32_t instep, uint32_t outstep)
409 */
410 llvm::SmallVector<llvm::Type*, 8> ParamTys;
411 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
412 ParamTys.push_back(Int32Ty); // uint32_t x1
413 ParamTys.push_back(Int32Ty); // uint32_t x2
414 ParamTys.push_back(Int32Ty); // uint32_t instep
415 ParamTys.push_back(Int32Ty); // uint32_t outstep
416
417 llvm::FunctionType *FT =
418 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
419 llvm::Function *ExpandedFunc =
420 llvm::Function::Create(FT,
421 llvm::GlobalValue::ExternalLinkage,
422 F->getName() + ".expand", M);
423
424 // Create and name the actual arguments to this expanded function.
425 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
426 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
427 E = ExpandedFunc->arg_end();
428 B != E;
429 ++B) {
430 ArgVec.push_back(B);
431 }
432
433 if (ArgVec.size() != 5) {
434 ALOGE("Incorrect number of arguments to function: %zu",
435 ArgVec.size());
436 return false;
437 }
438 llvm::Value *Arg_p = ArgVec[0];
439 llvm::Value *Arg_x1 = ArgVec[1];
440 llvm::Value *Arg_x2 = ArgVec[2];
441 llvm::Value *Arg_instep = ArgVec[3];
442 llvm::Value *Arg_outstep = ArgVec[4];
443
444 Arg_p->setName("p");
445 Arg_x1->setName("x1");
446 Arg_x2->setName("x2");
447 Arg_instep->setName("arg_instep");
448 Arg_outstep->setName("arg_outstep");
449
450 llvm::Value *InStep = NULL;
451 llvm::Value *OutStep = NULL;
452
453 // Construct the actual function body.
454 llvm::BasicBlock *Begin =
455 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
456 llvm::IRBuilder<> Builder(Begin);
457
458 // uint32_t X = x1;
459 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
460 Builder.CreateStore(Arg_x1, AX);
461
462 // Collect and construct the arguments for the kernel().
463 // Note that we load any loop-invariant arguments before entering the Loop.
464 llvm::Function::arg_iterator Args = F->arg_begin();
465
Stephen Hines74a4b082012-09-21 19:26:48 -0700466 llvm::Type *OutTy = NULL;
467 llvm::AllocaInst *AOut = NULL;
468 bool PassOutByReference = false;
469 if (hasOut(Signature)) {
470 llvm::Type *OutBaseTy = F->getReturnType();
471 if (OutBaseTy->isVoidTy()) {
472 PassOutByReference = true;
473 OutTy = Args->getType();
474 Args++;
475 } else {
476 OutTy = OutBaseTy->getPointerTo();
477 // We don't increment Args, since we are using the actual return type.
478 }
479 AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
Stephen Hinesb730e232013-01-09 15:31:36 -0800480 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines74a4b082012-09-21 19:26:48 -0700481 OutStep->setName("outstep");
482 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
483 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
484 }
485
Stephen Hines7ae3a822012-09-14 19:24:58 -0700486 llvm::Type *InBaseTy = NULL;
487 llvm::Type *InTy = NULL;
488 llvm::AllocaInst *AIn = NULL;
489 if (hasIn(Signature)) {
490 InBaseTy = Args->getType();
491 InTy =InBaseTy->getPointerTo();
492 AIn = Builder.CreateAlloca(InTy, 0, "AIn");
Stephen Hinesb730e232013-01-09 15:31:36 -0800493 InStep = getStepValue(&DL, InTy, Arg_instep);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700494 InStep->setName("instep");
495 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
496 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
497 Args++;
498 }
499
Stephen Hines7ae3a822012-09-14 19:24:58 -0700500 // No usrData parameter on kernels.
501 bccAssert(!hasUsrData(Signature));
502
503 if (hasX(Signature)) {
504 Args++;
505 }
506
507 llvm::Value *Y = NULL;
508 if (hasY(Signature)) {
509 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
510 Args++;
511 }
512
513 bccAssert(Args == F->arg_end());
514
515 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
516 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
517
518 // if (x1 < x2) goto Loop; else goto Exit;
519 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
520 Builder.CreateCondBr(Cond, Loop, Exit);
521
522 // Loop:
523 Builder.SetInsertPoint(Loop);
524
525 // Populate the actual call to kernel().
526 llvm::SmallVector<llvm::Value*, 8> RootArgs;
527
528 llvm::Value *InPtr = NULL;
529 llvm::Value *In = NULL;
530 llvm::Value *OutPtr = NULL;
531
Stephen Hines74a4b082012-09-21 19:26:48 -0700532 if (PassOutByReference) {
533 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
534 RootArgs.push_back(OutPtr);
535 }
536
Stephen Hines7ae3a822012-09-14 19:24:58 -0700537 if (AIn) {
538 InPtr = Builder.CreateLoad(AIn, "InPtr");
539 In = Builder.CreateLoad(InPtr, "In");
540 RootArgs.push_back(In);
541 }
542
543 // We always have to load X, since it is used to iterate through the loop.
544 llvm::Value *X = Builder.CreateLoad(AX, "X");
545 if (hasX(Signature)) {
546 RootArgs.push_back(X);
547 }
548
549 if (Y) {
550 RootArgs.push_back(Y);
551 }
552
553 llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
554
Stephen Hines74a4b082012-09-21 19:26:48 -0700555 if (AOut && !PassOutByReference) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700556 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
557 Builder.CreateStore(RetVal, OutPtr);
558 }
559
560 if (InPtr) {
561 // InPtr += instep
562 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
563 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
564 Builder.CreateStore(NewIn, AIn);
565 }
566
567 if (OutPtr) {
568 // OutPtr += outstep
569 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
570 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800571 Builder.CreateStore(NewOut, AOut);
572 }
573
574 // X++;
575 llvm::Value *XPlusOne =
576 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
577 Builder.CreateStore(XPlusOne, AX);
578
579 // If (X < x2) goto Loop; else goto Exit;
580 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
581 Builder.CreateCondBr(Cond, Loop, Exit);
582
583 // Exit:
584 Builder.SetInsertPoint(Exit);
585 Builder.CreateRetVoid();
586
587 return true;
588 }
589
590 virtual bool runOnModule(llvm::Module &M) {
Stephen Hinescc366e52012-02-21 17:22:04 -0800591 bool Changed = false;
Stephen Hinesdb169182012-01-05 18:46:36 -0800592 this->M = &M;
593 C = &M.getContext();
594
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700595 for (RSInfo::ExportForeachFuncListTy::const_iterator
596 func_iter = mFuncs.begin(), func_end = mFuncs.end();
597 func_iter != func_end; func_iter++) {
598 const char *name = func_iter->first;
599 uint32_t signature = func_iter->second;
600 llvm::Function *kernel = M.getFunction(name);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700601 if (kernel && isKernel(signature)) {
602 Changed |= ExpandKernel(kernel, signature);
603 }
604 else if (kernel && kernel->getReturnType()->isVoidTy()) {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700605 Changed |= ExpandFunction(kernel, signature);
Stephen Hinescc366e52012-02-21 17:22:04 -0800606 }
Stephen Hinesdb169182012-01-05 18:46:36 -0800607 }
608
Stephen Hinescc366e52012-02-21 17:22:04 -0800609 return Changed;
Stephen Hinesdb169182012-01-05 18:46:36 -0800610 }
611
612 virtual const char *getPassName() const {
613 return "ForEach-able Function Expansion";
614 }
615
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700616}; // end RSForEachExpandPass
Stephen Hinesdb169182012-01-05 18:46:36 -0800617
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700618} // end anonymous namespace
619
620char RSForEachExpandPass::ID = 0;
Stephen Hinesdb169182012-01-05 18:46:36 -0800621
622namespace bcc {
623
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700624llvm::ModulePass *
Stephen Hines2b040862012-07-27 20:18:08 -0700625createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
626 bool pEnableStepOpt){
627 return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700628}
Stephen Hinesdb169182012-01-05 18:46:36 -0800629
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700630} // end namespace bcc