blob: c9d3ef07f713f049d678d522c9d2b374919470d7 [file] [log] [blame]
Stephen Hinesdb169182012-01-05 18:46:36 -08001/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Stephen Hines6e9e89d2012-07-27 19:16:04 -070017#include "bcc/Assert.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070018#include "bcc/Renderscript/RSTransforms.h"
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070019
20#include <cstdlib>
21
Stephen Hinesb730e232013-01-09 15:31:36 -080022#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/Module.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080027#include <llvm/Pass.h>
Stephen Hines7ae3a822012-09-14 19:24:58 -070028#include <llvm/Support/raw_ostream.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080029#include <llvm/IR/DataLayout.h>
Tobias Grossercd5b6572013-07-01 15:04:07 -070030#include <llvm/IR/Function.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080031#include <llvm/IR/Type.h>
Tobias Grosser806075b2013-06-20 17:08:35 -070032#include <llvm/Transforms/Utils/BasicBlockUtils.h>
Stephen Hinesdb169182012-01-05 18:46:36 -080033
Zonr Changc72c4dd2012-04-12 15:38:53 +080034#include "bcc/Config/Config.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070035#include "bcc/Renderscript/RSInfo.h"
Zonr Changef73a242012-04-12 16:44:01 +080036#include "bcc/Support/Log.h"
Stephen Hinesdb169182012-01-05 18:46:36 -080037
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070038using namespace bcc;
39
Stephen Hinesdb169182012-01-05 18:46:36 -080040namespace {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070041
42/* RSForEachExpandPass - This pass operates on functions that are able to be
43 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
44 * ForEach-able function to be invoked over the appropriate data cells of the
45 * input/output allocations (adjusting other relevant parameters as we go). We
46 * support doing this for any ForEach-able compute kernels. The new function
47 * name is the original function name followed by ".expand". Note that we
48 * still generate code for the original function.
49 */
50class RSForEachExpandPass : public llvm::ModulePass {
51private:
Stephen Hinesdb169182012-01-05 18:46:36 -080052 static char ID;
53
54 llvm::Module *M;
55 llvm::LLVMContext *C;
56
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070057 const RSInfo::ExportForeachFuncListTy &mFuncs;
Stephen Hinescc366e52012-02-21 17:22:04 -080058
Stephen Hines2b040862012-07-27 20:18:08 -070059 // Turns on optimization of allocation stride values.
60 bool mEnableStepOpt;
61
Stephen Hinescc366e52012-02-21 17:22:04 -080062 uint32_t getRootSignature(llvm::Function *F) {
Stephen Hinesdb169182012-01-05 18:46:36 -080063 const llvm::NamedMDNode *ExportForEachMetadata =
64 M->getNamedMetadata("#rs_export_foreach");
65
66 if (!ExportForEachMetadata) {
67 llvm::SmallVector<llvm::Type*, 8> RootArgTys;
68 for (llvm::Function::arg_iterator B = F->arg_begin(),
69 E = F->arg_end();
70 B != E;
71 ++B) {
72 RootArgTys.push_back(B->getType());
73 }
74
75 // For pre-ICS bitcode, we may not have signature information. In that
76 // case, we use the size of the RootArgTys to select the number of
77 // arguments.
78 return (1 << RootArgTys.size()) - 1;
79 }
80
Stephen Hines7ae3a822012-09-14 19:24:58 -070081 if (ExportForEachMetadata->getNumOperands() == 0) {
82 return 0;
83 }
84
Stephen Hines6e9e89d2012-07-27 19:16:04 -070085 bccAssert(ExportForEachMetadata->getNumOperands() > 0);
Stephen Hinesdb169182012-01-05 18:46:36 -080086
Stephen Hinescc366e52012-02-21 17:22:04 -080087 // We only handle the case for legacy root() functions here, so this is
88 // hard-coded to look at only the first such function.
Stephen Hinesdb169182012-01-05 18:46:36 -080089 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
90 if (SigNode != NULL && SigNode->getNumOperands() == 1) {
91 llvm::Value *SigVal = SigNode->getOperand(0);
92 if (SigVal->getValueID() == llvm::Value::MDStringVal) {
93 llvm::StringRef SigString =
94 static_cast<llvm::MDString*>(SigVal)->getString();
95 uint32_t Signature = 0;
96 if (SigString.getAsInteger(10, Signature)) {
97 ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
98 return 0;
99 }
100 return Signature;
101 }
102 }
103
104 return 0;
105 }
106
Stephen Hines2b040862012-07-27 20:18:08 -0700107 // Get the actual value we should use to step through an allocation.
Tobias Grosser7b662902013-06-21 17:07:39 -0700108 //
109 // Normally the value we use to step through an allocation is given to us by
110 // the driver. However, for certain primitive data types, we can derive an
111 // integer constant for the step value. We use this integer constant whenever
112 // possible to allow further compiler optimizations to take place.
113 //
Stephen Hinesb730e232013-01-09 15:31:36 -0800114 // DL - Target Data size/layout information.
Stephen Hines2b040862012-07-27 20:18:08 -0700115 // T - Type of allocation (should be a pointer).
116 // OrigStep - Original step increment (root.expand() input from driver).
Stephen Hinesb730e232013-01-09 15:31:36 -0800117 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
Stephen Hines2b040862012-07-27 20:18:08 -0700118 llvm::Value *OrigStep) {
Stephen Hinesb730e232013-01-09 15:31:36 -0800119 bccAssert(DL);
Stephen Hines2b040862012-07-27 20:18:08 -0700120 bccAssert(T);
121 bccAssert(OrigStep);
122 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
123 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
124 if (mEnableStepOpt && T != VoidPtrTy && PT) {
125 llvm::Type *ET = PT->getElementType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800126 uint64_t ETSize = DL->getTypeAllocSize(ET);
Stephen Hines2b040862012-07-27 20:18:08 -0700127 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
128 return llvm::ConstantInt::get(Int32Ty, ETSize);
129 } else {
130 return OrigStep;
131 }
132 }
133
Stephen Hinesdb169182012-01-05 18:46:36 -0800134 static bool hasIn(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700135 return Signature & 0x01;
Stephen Hinesdb169182012-01-05 18:46:36 -0800136 }
137
138 static bool hasOut(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700139 return Signature & 0x02;
Stephen Hinesdb169182012-01-05 18:46:36 -0800140 }
141
142 static bool hasUsrData(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700143 return Signature & 0x04;
Stephen Hinesdb169182012-01-05 18:46:36 -0800144 }
145
146 static bool hasX(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700147 return Signature & 0x08;
Stephen Hinesdb169182012-01-05 18:46:36 -0800148 }
149
150 static bool hasY(uint32_t Signature) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700151 return Signature & 0x10;
Stephen Hinesdb169182012-01-05 18:46:36 -0800152 }
153
Stephen Hines7ae3a822012-09-14 19:24:58 -0700154 static bool isKernel(uint32_t Signature) {
155 return Signature & 0x20;
156 }
157
Tobias Grosser8ae46072013-06-20 14:00:31 -0700158 /// @brief Returns the type of the ForEach stub parameter structure.
159 ///
160 /// Renderscript uses a single structure in which all parameters are passed
161 /// to keep the signature of the expanded function independent of the
162 /// parameters passed to it.
163 llvm::Type *getForeachStubTy() {
Stephen Hinesdb169182012-01-05 18:46:36 -0800164 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
165 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
166 llvm::Type *SizeTy = Int32Ty;
Stephen Hinesdb169182012-01-05 18:46:36 -0800167 /* Defined in frameworks/base/libs/rs/rs_hal.h:
168 *
169 * struct RsForEachStubParamStruct {
170 * const void *in;
171 * void *out;
172 * const void *usr;
173 * size_t usr_len;
174 * uint32_t x;
175 * uint32_t y;
176 * uint32_t z;
177 * uint32_t lod;
178 * enum RsAllocationCubemapFace face;
179 * uint32_t ar[16];
180 * };
181 */
182 llvm::SmallVector<llvm::Type*, 9> StructTys;
183 StructTys.push_back(VoidPtrTy); // const void *in
184 StructTys.push_back(VoidPtrTy); // void *out
185 StructTys.push_back(VoidPtrTy); // const void *usr
186 StructTys.push_back(SizeTy); // size_t usr_len
187 StructTys.push_back(Int32Ty); // uint32_t x
188 StructTys.push_back(Int32Ty); // uint32_t y
189 StructTys.push_back(Int32Ty); // uint32_t z
190 StructTys.push_back(Int32Ty); // uint32_t lod
191 StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
192 StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
193
Tobias Grosser8ae46072013-06-20 14:00:31 -0700194 return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
195 }
196
Tobias Grosser357b5862013-06-20 14:12:46 -0700197 /// @brief Create skeleton of the expanded function.
198 ///
199 /// This creates a function with the following signature:
200 ///
201 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
202 /// uint32_t instep, uint32_t outstep)
203 ///
204 llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
205 llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
206 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
207
208 llvm::SmallVector<llvm::Type*, 8> ParamTys;
209 ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
210 ParamTys.push_back(Int32Ty); // uint32_t x1
211 ParamTys.push_back(Int32Ty); // uint32_t x2
212 ParamTys.push_back(Int32Ty); // uint32_t instep
213 ParamTys.push_back(Int32Ty); // uint32_t outstep
214
215 llvm::FunctionType *FT =
216 llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
Tobias Grosser802f6592013-06-20 14:27:16 -0700217 llvm::Function *F =
218 llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
219 OldName + ".expand", M);
220
221 llvm::Function::arg_iterator AI = F->arg_begin();
222
223 AI->setName("p");
224 AI++;
225 AI->setName("x1");
226 AI++;
227 AI->setName("x2");
228 AI++;
229 AI->setName("arg_instep");
230 AI++;
231 AI->setName("arg_outstep");
232 AI++;
233
234 assert(AI == F->arg_end());
235
Tobias Grosser806075b2013-06-20 17:08:35 -0700236 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*C, "Begin", F);
237 llvm::IRBuilder<> Builder(Begin);
238 Builder.CreateRetVoid();
239
Tobias Grosser802f6592013-06-20 14:27:16 -0700240 return F;
Tobias Grosser357b5862013-06-20 14:12:46 -0700241 }
242
Tobias Grossere4a73f62013-06-21 15:35:03 -0700243 /// @brief Create an empty loop
244 ///
245 /// Create a loop of the form:
246 ///
247 /// for (i = LowerBound; i < UpperBound; i++)
248 /// ;
249 ///
250 /// After the loop has been created, the builder is set such that
251 /// instructions can be added to the loop body.
252 ///
253 /// @param Builder The builder to use to build this loop. The current
254 /// position of the builder is the position the loop
255 /// will be inserted.
256 /// @param LowerBound The first value of the loop iterator
257 /// @param UpperBound The maximal value of the loop iterator
258 /// @param LoopIV A reference that will be set to the loop iterator.
259 /// @return The BasicBlock that will be executed after the loop.
260 llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
261 llvm::Value *LowerBound,
262 llvm::Value *UpperBound,
263 llvm::PHINode **LoopIV) {
264 assert(LowerBound->getType() == UpperBound->getType());
265
266 llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
267 llvm::Value *Cond, *IVNext;
268 llvm::PHINode *IV;
269
270 CondBB = Builder.GetInsertBlock();
271 AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
272 HeaderBB = llvm::BasicBlock::Create(*C, "Loop", CondBB->getParent());
273
274 // if (LowerBound < Upperbound)
275 // goto LoopHeader
276 // else
277 // goto AfterBB
278 CondBB->getTerminator()->eraseFromParent();
279 Builder.SetInsertPoint(CondBB);
Tobias Grossere87a0512013-06-25 15:31:11 -0700280 Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
Tobias Grossere4a73f62013-06-21 15:35:03 -0700281 Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
282
283 // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
284 // iv.next = iv + 1
285 // if (iv.next < Upperbound)
286 // goto LoopHeader
287 // else
288 // goto AfterBB
289 Builder.SetInsertPoint(HeaderBB);
290 IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
291 IV->addIncoming(LowerBound, CondBB);
292 IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
293 IV->addIncoming(IVNext, HeaderBB);
Tobias Grossere87a0512013-06-25 15:31:11 -0700294 Cond = Builder.CreateICmpULT(IVNext, UpperBound);
Tobias Grossere4a73f62013-06-21 15:35:03 -0700295 Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
296 AfterBB->setName("Exit");
297 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
298 *LoopIV = IV;
299 return AfterBB;
300 }
301
Tobias Grosser8ae46072013-06-20 14:00:31 -0700302public:
303 RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
304 bool pEnableStepOpt)
305 : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
306 mEnableStepOpt(pEnableStepOpt) {
307 }
308
309 /* Performs the actual optimization on a selected function. On success, the
310 * Module will contain a new function of the name "<NAME>.expand" that
311 * invokes <NAME>() in a loop with the appropriate parameters.
312 */
313 bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
314 ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
315
316 if (!Signature) {
317 Signature = getRootSignature(F);
318 if (!Signature) {
319 // We couldn't determine how to expand this function based on its
320 // function signature.
321 return false;
322 }
323 }
324
325 llvm::DataLayout DL(M);
326
Tobias Grosser357b5862013-06-20 14:12:46 -0700327 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
Stephen Hinesdb169182012-01-05 18:46:36 -0800328
329 // Create and name the actual arguments to this expanded function.
330 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
331 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
332 E = ExpandedFunc->arg_end();
333 B != E;
334 ++B) {
335 ArgVec.push_back(B);
336 }
337
338 if (ArgVec.size() != 5) {
Shih-wei Liao89e84902012-01-17 03:07:40 -0800339 ALOGE("Incorrect number of arguments to function: %zu",
340 ArgVec.size());
Stephen Hinesdb169182012-01-05 18:46:36 -0800341 return false;
342 }
343 llvm::Value *Arg_p = ArgVec[0];
344 llvm::Value *Arg_x1 = ArgVec[1];
345 llvm::Value *Arg_x2 = ArgVec[2];
346 llvm::Value *Arg_instep = ArgVec[3];
347 llvm::Value *Arg_outstep = ArgVec[4];
348
Stephen Hines2b040862012-07-27 20:18:08 -0700349 llvm::Value *InStep = NULL;
350 llvm::Value *OutStep = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800351
352 // Construct the actual function body.
Tobias Grossere4a73f62013-06-21 15:35:03 -0700353 llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
Stephen Hinesdb169182012-01-05 18:46:36 -0800354
Stephen Hinescc366e52012-02-21 17:22:04 -0800355 // Collect and construct the arguments for the kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800356 // Note that we load any loop-invariant arguments before entering the Loop.
357 llvm::Function::arg_iterator Args = F->arg_begin();
358
359 llvm::Type *InTy = NULL;
Tobias Grosserae937ec2013-06-27 13:49:47 -0700360 llvm::Value *InBasePtr = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800361 if (hasIn(Signature)) {
362 InTy = Args->getType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800363 InStep = getStepValue(&DL, InTy, Arg_instep);
Stephen Hines2b040862012-07-27 20:18:08 -0700364 InStep->setName("instep");
Tobias Grosserae937ec2013-06-27 13:49:47 -0700365 InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
Stephen Hinesdb169182012-01-05 18:46:36 -0800366 Args++;
367 }
368
369 llvm::Type *OutTy = NULL;
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700370 llvm::Value *OutBasePtr = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800371 if (hasOut(Signature)) {
372 OutTy = Args->getType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800373 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines2b040862012-07-27 20:18:08 -0700374 OutStep->setName("outstep");
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700375 OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
Stephen Hinesdb169182012-01-05 18:46:36 -0800376 Args++;
377 }
378
379 llvm::Value *UsrData = NULL;
380 if (hasUsrData(Signature)) {
381 llvm::Type *UsrDataTy = Args->getType();
382 UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
383 Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
384 UsrData->setName("UsrData");
385 Args++;
386 }
387
388 if (hasX(Signature)) {
389 Args++;
390 }
391
392 llvm::Value *Y = NULL;
393 if (hasY(Signature)) {
394 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
395 Args++;
396 }
397
Stephen Hines6e9e89d2012-07-27 19:16:04 -0700398 bccAssert(Args == F->arg_end());
Stephen Hinesdb169182012-01-05 18:46:36 -0800399
Tobias Grossere4a73f62013-06-21 15:35:03 -0700400 llvm::PHINode *IV;
401 createLoop(Builder, Arg_x1, Arg_x2, &IV);
Stephen Hinesdb169182012-01-05 18:46:36 -0800402
Stephen Hinescc366e52012-02-21 17:22:04 -0800403 // Populate the actual call to kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800404 llvm::SmallVector<llvm::Value*, 8> RootArgs;
405
Stephen Hines7ae3a822012-09-14 19:24:58 -0700406 llvm::Value *InPtr = NULL;
407 llvm::Value *OutPtr = NULL;
Stephen Hinesdb169182012-01-05 18:46:36 -0800408
Tobias Grosserae937ec2013-06-27 13:49:47 -0700409 // Calculate the current input and output pointers
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700410 //
Tobias Grosserae937ec2013-06-27 13:49:47 -0700411 // We always calculate the input/output pointers with a GEP operating on i8
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700412 // values and only cast at the very end to OutTy. This is because the step
413 // between two values is given in bytes.
414 //
415 // TODO: We could further optimize the output by using a GEP operation of
416 // type 'OutTy' in cases where the element type of the allocation allows.
417 if (OutBasePtr) {
418 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
419 OutOffset = Builder.CreateMul(OutOffset, OutStep);
420 OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
421 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
422 }
Tobias Grosserae937ec2013-06-27 13:49:47 -0700423 if (InBasePtr) {
424 llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
425 InOffset = Builder.CreateMul(InOffset, InStep);
426 InPtr = Builder.CreateGEP(InBasePtr, InOffset);
427 InPtr = Builder.CreatePointerCast(InPtr, InTy);
428 }
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700429
Tobias Grosserae937ec2013-06-27 13:49:47 -0700430 if (InPtr) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700431 RootArgs.push_back(InPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800432 }
433
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700434 if (OutPtr) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700435 RootArgs.push_back(OutPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800436 }
437
438 if (UsrData) {
439 RootArgs.push_back(UsrData);
440 }
441
Tobias Grossere4a73f62013-06-21 15:35:03 -0700442 llvm::Value *X = IV;
Stephen Hinesdb169182012-01-05 18:46:36 -0800443 if (hasX(Signature)) {
444 RootArgs.push_back(X);
445 }
446
447 if (Y) {
448 RootArgs.push_back(Y);
449 }
450
451 Builder.CreateCall(F, RootArgs);
452
Stephen Hines7ae3a822012-09-14 19:24:58 -0700453 return true;
454 }
455
456 /* Expand a pass-by-value kernel.
457 */
458 bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
459 bccAssert(isKernel(Signature));
460 ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
461
462 // TODO: Refactor this to share functionality with ExpandFunction.
Stephen Hinesb730e232013-01-09 15:31:36 -0800463 llvm::DataLayout DL(M);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700464
Tobias Grosser357b5862013-06-20 14:12:46 -0700465 llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700466
467 // Create and name the actual arguments to this expanded function.
468 llvm::SmallVector<llvm::Argument*, 8> ArgVec;
469 for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
470 E = ExpandedFunc->arg_end();
471 B != E;
472 ++B) {
473 ArgVec.push_back(B);
474 }
475
476 if (ArgVec.size() != 5) {
477 ALOGE("Incorrect number of arguments to function: %zu",
478 ArgVec.size());
479 return false;
480 }
481 llvm::Value *Arg_p = ArgVec[0];
482 llvm::Value *Arg_x1 = ArgVec[1];
483 llvm::Value *Arg_x2 = ArgVec[2];
484 llvm::Value *Arg_instep = ArgVec[3];
485 llvm::Value *Arg_outstep = ArgVec[4];
486
Stephen Hines7ae3a822012-09-14 19:24:58 -0700487 llvm::Value *InStep = NULL;
488 llvm::Value *OutStep = NULL;
489
490 // Construct the actual function body.
Tobias Grossere4a73f62013-06-21 15:35:03 -0700491 llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700492
493 // Collect and construct the arguments for the kernel().
494 // Note that we load any loop-invariant arguments before entering the Loop.
495 llvm::Function::arg_iterator Args = F->arg_begin();
496
Stephen Hines74a4b082012-09-21 19:26:48 -0700497 llvm::Type *OutTy = NULL;
Stephen Hines74a4b082012-09-21 19:26:48 -0700498 bool PassOutByReference = false;
Tobias Grosser7b662902013-06-21 17:07:39 -0700499 llvm::Value *OutBasePtr = NULL;
Stephen Hines74a4b082012-09-21 19:26:48 -0700500 if (hasOut(Signature)) {
501 llvm::Type *OutBaseTy = F->getReturnType();
502 if (OutBaseTy->isVoidTy()) {
503 PassOutByReference = true;
504 OutTy = Args->getType();
505 Args++;
506 } else {
507 OutTy = OutBaseTy->getPointerTo();
508 // We don't increment Args, since we are using the actual return type.
509 }
Stephen Hinesb730e232013-01-09 15:31:36 -0800510 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines74a4b082012-09-21 19:26:48 -0700511 OutStep->setName("outstep");
Tobias Grosser7b662902013-06-21 17:07:39 -0700512 OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
Stephen Hines74a4b082012-09-21 19:26:48 -0700513 }
514
Stephen Hines7ae3a822012-09-14 19:24:58 -0700515 llvm::Type *InBaseTy = NULL;
516 llvm::Type *InTy = NULL;
Tobias Grosser4102bec2013-06-27 13:53:29 -0700517 llvm::Value *InBasePtr = NULL;
Stephen Hines7ae3a822012-09-14 19:24:58 -0700518 if (hasIn(Signature)) {
519 InBaseTy = Args->getType();
520 InTy =InBaseTy->getPointerTo();
Stephen Hinesb730e232013-01-09 15:31:36 -0800521 InStep = getStepValue(&DL, InTy, Arg_instep);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700522 InStep->setName("instep");
Tobias Grosser4102bec2013-06-27 13:53:29 -0700523 InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
Stephen Hines7ae3a822012-09-14 19:24:58 -0700524 Args++;
525 }
526
Stephen Hines7ae3a822012-09-14 19:24:58 -0700527 // No usrData parameter on kernels.
528 bccAssert(!hasUsrData(Signature));
529
530 if (hasX(Signature)) {
531 Args++;
532 }
533
534 llvm::Value *Y = NULL;
535 if (hasY(Signature)) {
536 Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
537 Args++;
538 }
539
540 bccAssert(Args == F->arg_end());
541
Tobias Grossere4a73f62013-06-21 15:35:03 -0700542 llvm::PHINode *IV;
543 createLoop(Builder, Arg_x1, Arg_x2, &IV);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700544
545 // Populate the actual call to kernel().
546 llvm::SmallVector<llvm::Value*, 8> RootArgs;
547
548 llvm::Value *InPtr = NULL;
Stephen Hines7ae3a822012-09-14 19:24:58 -0700549 llvm::Value *OutPtr = NULL;
550
Tobias Grosser4102bec2013-06-27 13:53:29 -0700551 // Calculate the current input and output pointers
Tobias Grosser7b662902013-06-21 17:07:39 -0700552 //
Tobias Grosser4102bec2013-06-27 13:53:29 -0700553 // We always calculate the input/output pointers with a GEP operating on i8
Tobias Grosser7b662902013-06-21 17:07:39 -0700554 // values and only cast at the very end to OutTy. This is because the step
555 // between two values is given in bytes.
556 //
557 // TODO: We could further optimize the output by using a GEP operation of
558 // type 'OutTy' in cases where the element type of the allocation allows.
559 if (OutBasePtr) {
560 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
561 OutOffset = Builder.CreateMul(OutOffset, OutStep);
562 OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
563 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
564 }
Tobias Grosser4102bec2013-06-27 13:53:29 -0700565 if (InBasePtr) {
566 llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
567 InOffset = Builder.CreateMul(InOffset, InStep);
568 InPtr = Builder.CreateGEP(InBasePtr, InOffset);
569 InPtr = Builder.CreatePointerCast(InPtr, InTy);
570 }
Tobias Grosser7b662902013-06-21 17:07:39 -0700571
Stephen Hines74a4b082012-09-21 19:26:48 -0700572 if (PassOutByReference) {
Stephen Hines74a4b082012-09-21 19:26:48 -0700573 RootArgs.push_back(OutPtr);
574 }
575
Tobias Grosser4102bec2013-06-27 13:53:29 -0700576 if (InPtr) {
577 llvm::Value *In = Builder.CreateLoad(InPtr, "In");
Stephen Hines7ae3a822012-09-14 19:24:58 -0700578 RootArgs.push_back(In);
579 }
580
Tobias Grossere4a73f62013-06-21 15:35:03 -0700581 llvm::Value *X = IV;
Stephen Hines7ae3a822012-09-14 19:24:58 -0700582 if (hasX(Signature)) {
583 RootArgs.push_back(X);
584 }
585
586 if (Y) {
587 RootArgs.push_back(Y);
588 }
589
590 llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
591
Tobias Grosser7b662902013-06-21 17:07:39 -0700592 if (OutPtr && !PassOutByReference) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700593 Builder.CreateStore(RetVal, OutPtr);
594 }
595
Stephen Hinesdb169182012-01-05 18:46:36 -0800596 return true;
597 }
598
599 virtual bool runOnModule(llvm::Module &M) {
Stephen Hinescc366e52012-02-21 17:22:04 -0800600 bool Changed = false;
Stephen Hinesdb169182012-01-05 18:46:36 -0800601 this->M = &M;
602 C = &M.getContext();
603
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700604 for (RSInfo::ExportForeachFuncListTy::const_iterator
605 func_iter = mFuncs.begin(), func_end = mFuncs.end();
606 func_iter != func_end; func_iter++) {
607 const char *name = func_iter->first;
608 uint32_t signature = func_iter->second;
609 llvm::Function *kernel = M.getFunction(name);
Tobias Grossercd5b6572013-07-01 15:04:07 -0700610 if (kernel) {
Tobias Grosseracde6012013-07-02 14:28:01 -0700611 if (isKernel(signature)) {
Tobias Grossercd5b6572013-07-01 15:04:07 -0700612 Changed |= ExpandKernel(kernel, signature);
Tobias Grosseracde6012013-07-02 14:28:01 -0700613 kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
614 } else if (kernel->getReturnType()->isVoidTy()) {
Tobias Grossercd5b6572013-07-01 15:04:07 -0700615 Changed |= ExpandFunction(kernel, signature);
Tobias Grosseracde6012013-07-02 14:28:01 -0700616 kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
617 } else {
618 // There are some graphics root functions that are not
619 // expanded, but that will be called directly. For those
620 // functions, we can not set the linkage to internal.
621 }
Stephen Hinescc366e52012-02-21 17:22:04 -0800622 }
Stephen Hinesdb169182012-01-05 18:46:36 -0800623 }
624
Stephen Hinescc366e52012-02-21 17:22:04 -0800625 return Changed;
Stephen Hinesdb169182012-01-05 18:46:36 -0800626 }
627
628 virtual const char *getPassName() const {
629 return "ForEach-able Function Expansion";
630 }
631
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700632}; // end RSForEachExpandPass
Stephen Hinesdb169182012-01-05 18:46:36 -0800633
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700634} // end anonymous namespace
635
636char RSForEachExpandPass::ID = 0;
Stephen Hinesdb169182012-01-05 18:46:36 -0800637
638namespace bcc {
639
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700640llvm::ModulePass *
Stephen Hines2b040862012-07-27 20:18:08 -0700641createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
642 bool pEnableStepOpt){
643 return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700644}
Stephen Hinesdb169182012-01-05 18:46:36 -0800645
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -0700646} // end namespace bcc