blob: ead1d1e1137c27d7b3cf7c57acca8d4c659dfd79 [file] [log] [blame]
Stephen Hinesdb169182012-01-05 18:46:36 -08001/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Stephen Hines6e9e89d2012-07-27 19:16:04 -070017#include "bcc/Assert.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070018#include "bcc/Renderscript/RSTransforms.h"
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070019
20#include <cstdlib>
21
Zonr Changc72c4dd2012-04-12 15:38:53 +080022#include <llvm/DerivedTypes.h>
23#include <llvm/Function.h>
24#include <llvm/Instructions.h>
Shih-wei Liao8b5be862012-08-02 22:45:18 -070025#include <llvm/IRBuilder.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080026#include <llvm/Module.h>
27#include <llvm/Pass.h>
Stephen Hines7ae3a822012-09-14 19:24:58 -070028#include <llvm/Support/raw_ostream.h>
Stephen Hines2b040862012-07-27 20:18:08 -070029#include <llvm/Target/TargetData.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080030#include <llvm/Type.h>
Stephen Hinesdb169182012-01-05 18:46:36 -080031
Zonr Changc72c4dd2012-04-12 15:38:53 +080032#include "bcc/Config/Config.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070033#include "bcc/Renderscript/RSInfo.h"
Zonr Changef73a242012-04-12 16:44:01 +080034#include "bcc/Support/Log.h"
Stephen Hinesdb169182012-01-05 18:46:36 -080035
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070036using namespace bcc;
37
Stephen Hinesdb169182012-01-05 18:46:36 -080038namespace {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070039
40/* RSForEachExpandPass - This pass operates on functions that are able to be
41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
42 * ForEach-able function to be invoked over the appropriate data cells of the
43 * input/output allocations (adjusting other relevant parameters as we go). We
44 * support doing this for any ForEach-able compute kernels. The new function
45 * name is the original function name followed by ".expand". Note that we
46 * still generate code for the original function.
47 */
48class RSForEachExpandPass : public llvm::ModulePass {
49private:
Stephen Hinesdb169182012-01-05 18:46:36 -080050 static char ID;
51
52 llvm::Module *M;
53 llvm::LLVMContext *C;
54
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070055 const RSInfo::ExportForeachFuncListTy &mFuncs;
Stephen Hinescc366e52012-02-21 17:22:04 -080056
Stephen Hines2b040862012-07-27 20:18:08 -070057 // Turns on optimization of allocation stride values.
58 bool mEnableStepOpt;
59
Stephen Hinescc366e52012-02-21 17:22:04 -080060 uint32_t getRootSignature(llvm::Function *F) {
Stephen Hinesdb169182012-01-05 18:46:36 -080061 const llvm::NamedMDNode *ExportForEachMetadata =
62 M->getNamedMetadata("#rs_export_foreach");
63
64 if (!ExportForEachMetadata) {
65 llvm::SmallVector<llvm::Type*, 8> RootArgTys;
66 for (llvm::Function::arg_iterator B = F->arg_begin(),
67 E = F->arg_end();
68 B != E;
69 ++B) {
70 RootArgTys.push_back(B->getType());
71 }
72
73 // For pre-ICS bitcode, we may not have signature information. In that
74 // case, we use the size of the RootArgTys to select the number of
75 // arguments.
76 return (1 << RootArgTys.size()) - 1;
77 }
78
Stephen Hines7ae3a822012-09-14 19:24:58 -070079 if (ExportForEachMetadata->getNumOperands() == 0) {
80 return 0;
81 }
82
Stephen Hines6e9e89d2012-07-27 19:16:04 -070083 bccAssert(ExportForEachMetadata->getNumOperands() > 0);
Stephen Hinesdb169182012-01-05 18:46:36 -080084
Stephen Hinescc366e52012-02-21 17:22:04 -080085 // We only handle the case for legacy root() functions here, so this is
86 // hard-coded to look at only the first such function.
Stephen Hinesdb169182012-01-05 18:46:36 -080087 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
88 if (SigNode != NULL && SigNode->getNumOperands() == 1) {
89 llvm::Value *SigVal = SigNode->getOperand(0);
90 if (SigVal->getValueID() == llvm::Value::MDStringVal) {
91 llvm::StringRef SigString =
92 static_cast<llvm::MDString*>(SigVal)->getString();
93 uint32_t Signature = 0;
94 if (SigString.getAsInteger(10, Signature)) {
95 ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
96 return 0;
97 }
98 return Signature;
99 }
100 }
101
102 return 0;
103 }
104
Stephen Hines2b040862012-07-27 20:18:08 -0700105 // Get the actual value we should use to step through an allocation.
106 // TD - Target Data size/layout information.
107 // T - Type of allocation (should be a pointer).
108 // OrigStep - Original step increment (root.expand() input from driver).
109 llvm::Value *getStepValue(llvm::TargetData *TD, llvm::Type *T,
110 llvm::Value *OrigStep) {
111 bccAssert(TD);
112 bccAssert(T);
113 bccAssert(OrigStep);
114 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
115 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
116 if (mEnableStepOpt && T != VoidPtrTy && PT) {
117 llvm::Type *ET = PT->getElementType();
118 uint64_t ETSize = TD->getTypeStoreSize(ET);
119 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
120 return llvm::ConstantInt::get(Int32Ty, ETSize);
121 } else {
122 return OrigStep;
123 }
124 }
125
Stephen Hinesdb169182012-01-05 18:46:36 -0800126 static bool hasIn(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700127 return Signature & 0x01;
Stephen Hinesdb169182012-01-05 18:46:36 -0800128 }
129
130 static bool hasOut(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700131 return Signature & 0x02;
Stephen Hinesdb169182012-01-05 18:46:36 -0800132 }
133
134 static bool hasUsrData(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700135 return Signature & 0x04;
Stephen Hinesdb169182012-01-05 18:46:36 -0800136 }
137
138 static bool hasX(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700139 return Signature & 0x08;
Stephen Hinesdb169182012-01-05 18:46:36 -0800140 }
141
142 static bool hasY(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700143 return Signature & 0x10;
Stephen Hinesdb169182012-01-05 18:46:36 -0800144 }
145
Stephen Hines7ae3a822012-09-14 19:24:58 -0700146 static bool isKernel(uint32_t Signature) {
147 return Signature & 0x20;
148 }
149
150
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700151public:
Stephen Hines2b040862012-07-27 20:18:08 -0700152 RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
153 bool pEnableStepOpt)
154 : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
155 mEnableStepOpt(pEnableStepOpt) {
Stephen Hinesdb169182012-01-05 18:46:36 -0800156 }
157
158 /* Performs the actual optimization on a selected function. On success, the
159 * Module will contain a new function of the name "<NAME>.expand" that
160 * invokes <NAME>() in a loop with the appropriate parameters.
161 */
Stephen Hinescc366e52012-02-21 17:22:04 -0800162 bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
163 ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
Stephen Hinesdb169182012-01-05 18:46:36 -0800164
Stephen Hinesdb169182012-01-05 18:46:36 -0800165 if (!Signature) {
Stephen Hinescc366e52012-02-21 17:22:04 -0800166 Signature = getRootSignature(F);
167 if (!Signature) {
168 // We couldn't determine how to expand this function based on its
169 // function signature.
170 return false;
171 }
Stephen Hinesdb169182012-01-05 18:46:36 -0800172 }
173
Stephen Hines2b040862012-07-27 20:18:08 -0700174 llvm::TargetData TD(M);
175
Stephen Hinesdb169182012-01-05 18:46:36 -0800176 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
177 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
178 llvm::Type *SizeTy = Int32Ty;
179
180 /* Defined in frameworks/base/libs/rs/rs_hal.h:
181 *
182 * struct RsForEachStubParamStruct {
183 * const void *in;
184 * void *out;
185 * const void *usr;
186 * size_t usr_len;
187 * uint32_t x;
188 * uint32_t y;
189 * uint32_t z;
190 * uint32_t lod;
191 * enum RsAllocationCubemapFace face;
192 * uint32_t ar[16];
193 * };
194 */
195 llvm::SmallVector<llvm::Type*, 9> StructTys;
196 StructTys.push_back(VoidPtrTy); // const void *in
197 StructTys.push_back(VoidPtrTy); // void *out
198 StructTys.push_back(VoidPtrTy); // const void *usr
199 StructTys.push_back(SizeTy); // size_t usr_len
200 StructTys.push_back(Int32Ty); // uint32_t x
201 StructTys.push_back(Int32Ty); // uint32_t y
202 StructTys.push_back(Int32Ty); // uint32_t z
203 StructTys.push_back(Int32Ty); // uint32_t lod
204 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
205 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
206
207 llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
208 StructTys, "RsForEachStubParamStruct")->getPointerTo();
209
210 /* Create the function signature for our expanded function.
211 * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
212 * uint32_t instep, uint32_t outstep)
213 */
214 llvm::SmallVector<llvm::Type*, 8> ParamTys;
215 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
216 ParamTys.push_back(Int32Ty); // uint32_t x1
217 ParamTys.push_back(Int32Ty); // uint32_t x2
218 ParamTys.push_back(Int32Ty); // uint32_t instep
219 ParamTys.push_back(Int32Ty); // uint32_t outstep
220
221 llvm::FunctionType *FT =
222 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
223 llvm::Function *ExpandedFunc =
224 llvm::Function::Create(FT,
225 llvm::GlobalValue::ExternalLinkage,
226 F->getName() + ".expand", M);
227
228 // Create and name the actual arguments to this expanded function.
229 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
230 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
231 E = ExpandedFunc->arg_end();
232 B != E;
233 ++B) {
234 ArgVec.push_back(B);
235 }
236
237 if (ArgVec.size() != 5) {
Shih-wei Liao89e84902012-01-17 03:07:40 -0800238 ALOGE("Incorrect number of arguments to function: %zu",
239 ArgVec.size());
Stephen Hinesdb169182012-01-05 18:46:36 -0800240 return false;
241 }
242 llvm::Value *Arg_p = ArgVec[0];
243 llvm::Value *Arg_x1 = ArgVec[1];
244 llvm::Value *Arg_x2 = ArgVec[2];
245 llvm::Value *Arg_instep = ArgVec[3];
246 llvm::Value *Arg_outstep = ArgVec[4];
247
248 Arg_p->setName("p");
249 Arg_x1->setName("x1");
250 Arg_x2->setName("x2");
Stephen Hines2b040862012-07-27 20:18:08 -0700251 Arg_instep->setName("arg_instep");
252 Arg_outstep->setName("arg_outstep");
253
254 llvm::Value *InStep = NULL;
255 llvm::Value *OutStep = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800256
257 // Construct the actual function body.
258 llvm::BasicBlock *Begin =
259 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
260 llvm::IRBuilder<> Builder(Begin);
261
262 // uint32_t X = x1;
263 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
264 Builder.CreateStore(Arg_x1, AX);
265
Stephen Hinescc366e52012-02-21 17:22:04 -0800266 // Collect and construct the arguments for the kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800267 // Note that we load any loop-invariant arguments before entering the Loop.
268 llvm::Function::arg_iterator Args = F->arg_begin();
269
270 llvm::Type *InTy = NULL;
271 llvm::AllocaInst *AIn = NULL;
272 if (hasIn(Signature)) {
273 InTy = Args->getType();
274 AIn = Builder.CreateAlloca(InTy, 0, "AIn");
Stephen Hines2b040862012-07-27 20:18:08 -0700275 InStep = getStepValue(&TD, InTy, Arg_instep);
276 InStep->setName("instep");
Stephen Hinesdb169182012-01-05 18:46:36 -0800277 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
278 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
279 Args++;
280 }
281
282 llvm::Type *OutTy = NULL;
283 llvm::AllocaInst *AOut = NULL;
284 if (hasOut(Signature)) {
285 OutTy = Args->getType();
286 AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
Stephen Hines2b040862012-07-27 20:18:08 -0700287 OutStep = getStepValue(&TD, OutTy, Arg_outstep);
288 OutStep->setName("outstep");
Stephen Hinesdb169182012-01-05 18:46:36 -0800289 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
290 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
291 Args++;
292 }
293
294 llvm::Value *UsrData = NULL;
295 if (hasUsrData(Signature)) {
296 llvm::Type *UsrDataTy = Args->getType();
297 UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
298 Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
299 UsrData->setName("UsrData");
300 Args++;
301 }
302
303 if (hasX(Signature)) {
304 Args++;
305 }
306
307 llvm::Value *Y = NULL;
308 if (hasY(Signature)) {
309 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
310 Args++;
311 }
312
Stephen Hines6e9e89d2012-07-27 19:16:04 -0700313 bccAssert(Args == F->arg_end());
Stephen Hinesdb169182012-01-05 18:46:36 -0800314
315 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
316 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
317
318 // if (x1 < x2) goto Loop; else goto Exit;
319 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
320 Builder.CreateCondBr(Cond, Loop, Exit);
321
322 // Loop:
323 Builder.SetInsertPoint(Loop);
324
Stephen Hinescc366e52012-02-21 17:22:04 -0800325 // Populate the actual call to kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800326 llvm::SmallVector<llvm::Value*, 8> RootArgs;
327
Stephen Hines7ae3a822012-09-14 19:24:58 -0700328 llvm::Value *InPtr = NULL;
329 llvm::Value *OutPtr = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800330
331 if (AIn) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700332 InPtr = Builder.CreateLoad(AIn, "InPtr");
333 RootArgs.push_back(InPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800334 }
335
336 if (AOut) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700337 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
338 RootArgs.push_back(OutPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800339 }
340
341 if (UsrData) {
342 RootArgs.push_back(UsrData);
343 }
344
345 // We always have to load X, since it is used to iterate through the loop.
346 llvm::Value *X = Builder.CreateLoad(AX, "X");
347 if (hasX(Signature)) {
348 RootArgs.push_back(X);
349 }
350
351 if (Y) {
352 RootArgs.push_back(Y);
353 }
354
355 Builder.CreateCall(F, RootArgs);
356
Stephen Hines7ae3a822012-09-14 19:24:58 -0700357 if (InPtr) {
358 // InPtr += instep
Stephen Hinesdb169182012-01-05 18:46:36 -0800359 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
Stephen Hines7ae3a822012-09-14 19:24:58 -0700360 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800361 Builder.CreateStore(NewIn, AIn);
362 }
363
Stephen Hines7ae3a822012-09-14 19:24:58 -0700364 if (OutPtr) {
365 // OutPtr += outstep
Stephen Hinesdb169182012-01-05 18:46:36 -0800366 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
Stephen Hines7ae3a822012-09-14 19:24:58 -0700367 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
368 Builder.CreateStore(NewOut, AOut);
369 }
370
371 // X++;
372 llvm::Value *XPlusOne =
373 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
374 Builder.CreateStore(XPlusOne, AX);
375
376 // If (X < x2) goto Loop; else goto Exit;
377 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
378 Builder.CreateCondBr(Cond, Loop, Exit);
379
380 // Exit:
381 Builder.SetInsertPoint(Exit);
382 Builder.CreateRetVoid();
383
384 return true;
385 }
386
387 /* Expand a pass-by-value kernel.
388 */
389 bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
390 bccAssert(isKernel(Signature));
391 ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
392
393 // TODO: Refactor this to share functionality with ExpandFunction.
394 llvm::TargetData TD(M);
395
396 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
397 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
398 llvm::Type *SizeTy = Int32Ty;
399
400 /* Defined in frameworks/base/libs/rs/rs_hal.h:
401 *
402 * struct RsForEachStubParamStruct {
403 * const void *in;
404 * void *out;
405 * const void *usr;
406 * size_t usr_len;
407 * uint32_t x;
408 * uint32_t y;
409 * uint32_t z;
410 * uint32_t lod;
411 * enum RsAllocationCubemapFace face;
412 * uint32_t ar[16];
413 * };
414 */
415 llvm::SmallVector<llvm::Type*, 9> StructTys;
416 StructTys.push_back(VoidPtrTy); // const void *in
417 StructTys.push_back(VoidPtrTy); // void *out
418 StructTys.push_back(VoidPtrTy); // const void *usr
419 StructTys.push_back(SizeTy); // size_t usr_len
420 StructTys.push_back(Int32Ty); // uint32_t x
421 StructTys.push_back(Int32Ty); // uint32_t y
422 StructTys.push_back(Int32Ty); // uint32_t z
423 StructTys.push_back(Int32Ty); // uint32_t lod
424 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
425 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
426
427 llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
428 StructTys, "RsForEachStubParamStruct")->getPointerTo();
429
430 /* Create the function signature for our expanded function.
431 * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
432 * uint32_t instep, uint32_t outstep)
433 */
434 llvm::SmallVector<llvm::Type*, 8> ParamTys;
435 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
436 ParamTys.push_back(Int32Ty); // uint32_t x1
437 ParamTys.push_back(Int32Ty); // uint32_t x2
438 ParamTys.push_back(Int32Ty); // uint32_t instep
439 ParamTys.push_back(Int32Ty); // uint32_t outstep
440
441 llvm::FunctionType *FT =
442 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
443 llvm::Function *ExpandedFunc =
444 llvm::Function::Create(FT,
445 llvm::GlobalValue::ExternalLinkage,
446 F->getName() + ".expand", M);
447
448 // Create and name the actual arguments to this expanded function.
449 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
450 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
451 E = ExpandedFunc->arg_end();
452 B != E;
453 ++B) {
454 ArgVec.push_back(B);
455 }
456
457 if (ArgVec.size() != 5) {
458 ALOGE("Incorrect number of arguments to function: %zu",
459 ArgVec.size());
460 return false;
461 }
462 llvm::Value *Arg_p = ArgVec[0];
463 llvm::Value *Arg_x1 = ArgVec[1];
464 llvm::Value *Arg_x2 = ArgVec[2];
465 llvm::Value *Arg_instep = ArgVec[3];
466 llvm::Value *Arg_outstep = ArgVec[4];
467
468 Arg_p->setName("p");
469 Arg_x1->setName("x1");
470 Arg_x2->setName("x2");
471 Arg_instep->setName("arg_instep");
472 Arg_outstep->setName("arg_outstep");
473
474 llvm::Value *InStep = NULL;
475 llvm::Value *OutStep = NULL;
476
477 // Construct the actual function body.
478 llvm::BasicBlock *Begin =
479 llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
480 llvm::IRBuilder<> Builder(Begin);
481
482 // uint32_t X = x1;
483 llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
484 Builder.CreateStore(Arg_x1, AX);
485
486 // Collect and construct the arguments for the kernel().
487 // Note that we load any loop-invariant arguments before entering the Loop.
488 llvm::Function::arg_iterator Args = F->arg_begin();
489
490 llvm::Type *InBaseTy = NULL;
491 llvm::Type *InTy = NULL;
492 llvm::AllocaInst *AIn = NULL;
493 if (hasIn(Signature)) {
494 InBaseTy = Args->getType();
495 InTy =InBaseTy->getPointerTo();
496 AIn = Builder.CreateAlloca(InTy, 0, "AIn");
497 InStep = getStepValue(&TD, InTy, Arg_instep);
498 InStep->setName("instep");
499 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
500 Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
501 Args++;
502 }
503
504 llvm::Type *OutBaseTy = NULL;
505 llvm::Type *OutTy = NULL;
506 llvm::AllocaInst *AOut = NULL;
507 if (hasOut(Signature)) {
508 OutBaseTy = F->getReturnType();
509 OutTy = OutBaseTy->getPointerTo();
510 AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
511 OutStep = getStepValue(&TD, OutTy, Arg_outstep);
512 OutStep->setName("outstep");
513 Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
514 Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
515 // We don't increment Args, since we are using the actual return type.
516 }
517
518 // No usrData parameter on kernels.
519 bccAssert(!hasUsrData(Signature));
520
521 if (hasX(Signature)) {
522 Args++;
523 }
524
525 llvm::Value *Y = NULL;
526 if (hasY(Signature)) {
527 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
528 Args++;
529 }
530
531 bccAssert(Args == F->arg_end());
532
533 llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
534 llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
535
536 // if (x1 < x2) goto Loop; else goto Exit;
537 llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
538 Builder.CreateCondBr(Cond, Loop, Exit);
539
540 // Loop:
541 Builder.SetInsertPoint(Loop);
542
543 // Populate the actual call to kernel().
544 llvm::SmallVector<llvm::Value*, 8> RootArgs;
545
546 llvm::Value *InPtr = NULL;
547 llvm::Value *In = NULL;
548 llvm::Value *OutPtr = NULL;
549
550 if (AIn) {
551 InPtr = Builder.CreateLoad(AIn, "InPtr");
552 In = Builder.CreateLoad(InPtr, "In");
553 RootArgs.push_back(In);
554 }
555
556 // We always have to load X, since it is used to iterate through the loop.
557 llvm::Value *X = Builder.CreateLoad(AX, "X");
558 if (hasX(Signature)) {
559 RootArgs.push_back(X);
560 }
561
562 if (Y) {
563 RootArgs.push_back(Y);
564 }
565
566 llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
567
568 if (AOut) {
569 OutPtr = Builder.CreateLoad(AOut, "OutPtr");
570 Builder.CreateStore(RetVal, OutPtr);
571 }
572
573 if (InPtr) {
574 // InPtr += instep
575 llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
576 Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
577 Builder.CreateStore(NewIn, AIn);
578 }
579
580 if (OutPtr) {
581 // OutPtr += outstep
582 llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
583 Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800584 Builder.CreateStore(NewOut, AOut);
585 }
586
587 // X++;
588 llvm::Value *XPlusOne =
589 Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
590 Builder.CreateStore(XPlusOne, AX);
591
592 // If (X < x2) goto Loop; else goto Exit;
593 Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
594 Builder.CreateCondBr(Cond, Loop, Exit);
595
596 // Exit:
597 Builder.SetInsertPoint(Exit);
598 Builder.CreateRetVoid();
599
600 return true;
601 }
602
603 virtual bool runOnModule(llvm::Module &M) {
Stephen Hinescc366e52012-02-21 17:22:04 -0800604 bool Changed = false;
Stephen Hinesdb169182012-01-05 18:46:36 -0800605 this->M = &M;
606 C = &M.getContext();
607
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700608 for (RSInfo::ExportForeachFuncListTy::const_iterator
609 func_iter = mFuncs.begin(), func_end = mFuncs.end();
610 func_iter != func_end; func_iter++) {
611 const char *name = func_iter->first;
612 uint32_t signature = func_iter->second;
613 llvm::Function *kernel = M.getFunction(name);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700614 if (kernel && isKernel(signature)) {
615 Changed |= ExpandKernel(kernel, signature);
616 }
617 else if (kernel && kernel->getReturnType()->isVoidTy()) {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700618 Changed |= ExpandFunction(kernel, signature);
Stephen Hinescc366e52012-02-21 17:22:04 -0800619 }
Stephen Hinesdb169182012-01-05 18:46:36 -0800620 }
621
Stephen Hinescc366e52012-02-21 17:22:04 -0800622 return Changed;
Stephen Hinesdb169182012-01-05 18:46:36 -0800623 }
624
625 virtual const char *getPassName() const {
626 return "ForEach-able Function Expansion";
627 }
628
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700629}; // end RSForEachExpandPass
Stephen Hinesdb169182012-01-05 18:46:36 -0800630
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700631} // end anonymous namespace
632
633char RSForEachExpandPass::ID = 0;
Stephen Hinesdb169182012-01-05 18:46:36 -0800634
635namespace bcc {
636
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700637llvm::ModulePass *
Stephen Hines2b040862012-07-27 20:18:08 -0700638createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
639 bool pEnableStepOpt){
640 return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700641}
Stephen Hinesdb169182012-01-05 18:46:36 -0800642
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700643} // end namespace bcc