blob: d06cb5b43479c91022a3d717c763ac07fd31910c [file] [log] [blame]
Stephen Hinesdb169182012-01-05 18:46:36 -08001/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Stephen Hines6e9e89d2012-07-27 19:16:04 -070017#include "bcc/Assert.h"
Stephen Hinese198abe2012-07-27 18:05:41 -070018#include "bcc/Renderscript/RSTransforms.h"
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070019
20#include <cstdlib>
David Gross33cda5c2015-01-30 11:41:19 -080021#include <functional>
David Grosse32af522016-01-15 12:15:48 -080022#include <unordered_set>
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070023
Stephen Hinesb730e232013-01-09 15:31:36 -080024#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/Instructions.h>
27#include <llvm/IR/IRBuilder.h>
Tobias Grosser18a38a32013-07-26 15:03:03 -070028#include <llvm/IR/MDBuilder.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080029#include <llvm/IR/Module.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080030#include <llvm/Pass.h>
Stephen Hines7ae3a822012-09-14 19:24:58 -070031#include <llvm/Support/raw_ostream.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080032#include <llvm/IR/DataLayout.h>
Tobias Grossercd5b6572013-07-01 15:04:07 -070033#include <llvm/IR/Function.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080034#include <llvm/IR/Type.h>
Tobias Grosser806075b2013-06-20 17:08:35 -070035#include <llvm/Transforms/Utils/BasicBlockUtils.h>
Stephen Hinesdb169182012-01-05 18:46:36 -080036
Zonr Changc72c4dd2012-04-12 15:38:53 +080037#include "bcc/Config/Config.h"
Zonr Changef73a242012-04-12 16:44:01 +080038#include "bcc/Support/Log.h"
Stephen Hinesdb169182012-01-05 18:46:36 -080039
Stephen Hinesd8817752013-08-02 17:56:51 -070040#include "bcinfo/MetadataExtractor.h"
41
Matt Wala4e7a5062015-07-30 16:27:51 -070042#ifndef __DISABLE_ASSERTS
43// Only used in bccAssert()
44const int kNumExpandedForeachParams = 4;
45const int kNumExpandedReduceParams = 3;
David Grosse32af522016-01-15 12:15:48 -080046const int kNumExpandedReduceNewAccumulatorParams = 4;
Matt Wala4e7a5062015-07-30 16:27:51 -070047#endif
48
49const char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
50const char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
Chris Wailesbdbff6e2014-06-13 13:47:19 -070051
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070052using namespace bcc;
53
Stephen Hinesdb169182012-01-05 18:46:36 -080054namespace {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070055
Stephen Hines354d1c12015-04-03 22:54:54 -070056static const bool gEnableRsTbaa = true;
Stephen Hines9c5263e2014-02-11 15:58:48 -080057
Matt Wala4e7a5062015-07-30 16:27:51 -070058/* RSKernelExpandPass - This pass operates on functions that are able
59 * to be called via rsForEach(), "foreach_<NAME>", or
60 * "reduce_<NAME>". We create an inner loop for the function to be
61 * invoked over the appropriate data cells of the input/output
62 * allocations (adjusting other relevant parameters as we go). We
63 * support doing this for any forEach or reduce style compute
64 * kernels. The new function name is the original function name
65 * followed by ".expand". Note that we still generate code for the
66 * original function.
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070067 */
Matt Wala4e7a5062015-07-30 16:27:51 -070068class RSKernelExpandPass : public llvm::ModulePass {
David Gross33cda5c2015-01-30 11:41:19 -080069public:
Stephen Hinesdb169182012-01-05 18:46:36 -080070 static char ID;
71
David Gross33cda5c2015-01-30 11:41:19 -080072private:
David Grosse44a3522015-03-13 15:24:27 -070073 static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
74
David Grosse32af522016-01-15 12:15:48 -080075 typedef std::unordered_set<llvm::Function *> FunctionSet;
76
David Grosse44a3522015-03-13 15:24:27 -070077 enum RsLaunchDimensionsField {
78 RsLaunchDimensionsFieldX,
79 RsLaunchDimensionsFieldY,
80 RsLaunchDimensionsFieldZ,
81 RsLaunchDimensionsFieldLod,
82 RsLaunchDimensionsFieldFace,
83 RsLaunchDimensionsFieldArray,
84
85 RsLaunchDimensionsFieldCount
86 };
87
88 enum RsExpandKernelDriverInfoPfxField {
89 RsExpandKernelDriverInfoPfxFieldInPtr,
90 RsExpandKernelDriverInfoPfxFieldInStride,
91 RsExpandKernelDriverInfoPfxFieldInLen,
92 RsExpandKernelDriverInfoPfxFieldOutPtr,
93 RsExpandKernelDriverInfoPfxFieldOutStride,
94 RsExpandKernelDriverInfoPfxFieldOutLen,
95 RsExpandKernelDriverInfoPfxFieldDim,
96 RsExpandKernelDriverInfoPfxFieldCurrent,
97 RsExpandKernelDriverInfoPfxFieldUsr,
98 RsExpandKernelDriverInfoPfxFieldUsLenr,
99
100 RsExpandKernelDriverInfoPfxFieldCount
101 };
David Gross33cda5c2015-01-30 11:41:19 -0800102
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700103 llvm::Module *Module;
104 llvm::LLVMContext *Context;
105
106 /*
Matt Wala4e7a5062015-07-30 16:27:51 -0700107 * Pointers to LLVM type information for the the function signatures
108 * for expanded functions. These must be re-calculated for each module
109 * the pass is run on.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700110 */
Matt Wala4e7a5062015-07-30 16:27:51 -0700111 llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType;
David Grosse32af522016-01-15 12:15:48 -0800112 llvm::Type *RsExpandKernelDriverInfoPfxTy;
Stephen Hinesdb169182012-01-05 18:46:36 -0800113
Stephen Hines25eb5862014-05-08 18:25:50 -0700114 uint32_t mExportForEachCount;
115 const char **mExportForEachNameList;
116 const uint32_t *mExportForEachSignatureList;
Stephen Hinescc366e52012-02-21 17:22:04 -0800117
Matt Wala4e7a5062015-07-30 16:27:51 -0700118 uint32_t mExportReduceCount;
119 const char **mExportReduceNameList;
120
Stephen Hines2b040862012-07-27 20:18:08 -0700121 // Turns on optimization of allocation stride values.
122 bool mEnableStepOpt;
123
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700124 uint32_t getRootSignature(llvm::Function *Function) {
Stephen Hinesdb169182012-01-05 18:46:36 -0800125 const llvm::NamedMDNode *ExportForEachMetadata =
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700126 Module->getNamedMetadata("#rs_export_foreach");
Stephen Hinesdb169182012-01-05 18:46:36 -0800127
128 if (!ExportForEachMetadata) {
129 llvm::SmallVector<llvm::Type*, 8> RootArgTys;
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700130 for (llvm::Function::arg_iterator B = Function->arg_begin(),
131 E = Function->arg_end();
Stephen Hinesdb169182012-01-05 18:46:36 -0800132 B != E;
133 ++B) {
134 RootArgTys.push_back(B->getType());
135 }
136
137 // For pre-ICS bitcode, we may not have signature information. In that
138 // case, we use the size of the RootArgTys to select the number of
139 // arguments.
140 return (1 << RootArgTys.size()) - 1;
141 }
142
Stephen Hines7ae3a822012-09-14 19:24:58 -0700143 if (ExportForEachMetadata->getNumOperands() == 0) {
144 return 0;
145 }
146
Stephen Hines6e9e89d2012-07-27 19:16:04 -0700147 bccAssert(ExportForEachMetadata->getNumOperands() > 0);
Stephen Hinesdb169182012-01-05 18:46:36 -0800148
Stephen Hinescc366e52012-02-21 17:22:04 -0800149 // We only handle the case for legacy root() functions here, so this is
150 // hard-coded to look at only the first such function.
Stephen Hinesdb169182012-01-05 18:46:36 -0800151 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
Chris Wailes900c6c12014-08-13 15:40:00 -0700152 if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
Stephen Hines1bd9f622015-03-18 14:53:10 -0700153 llvm::Metadata *SigMD = SigNode->getOperand(0);
154 if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
155 llvm::StringRef SigString = SigS->getString();
Stephen Hinesdb169182012-01-05 18:46:36 -0800156 uint32_t Signature = 0;
157 if (SigString.getAsInteger(10, Signature)) {
158 ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
159 return 0;
160 }
161 return Signature;
162 }
163 }
164
165 return 0;
166 }
167
Tim Murray429d94a2014-10-30 15:34:01 -0700168 bool isStepOptSupported(llvm::Type *AllocType) {
169
170 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
171 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
172
173 if (mEnableStepOpt) {
174 return false;
175 }
176
177 if (AllocType == VoidPtrTy) {
178 return false;
179 }
180
181 if (!PT) {
182 return false;
183 }
184
185 // remaining conditions are 64-bit only
186 if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
187 return true;
188 }
189
190 // coerce suggests an upconverted struct type, which we can't support
191 if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
192 return false;
193 }
194
195 // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
196 llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
197 llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
198 if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
199 return false;
200 }
201
202 return true;
203 }
204
Stephen Hines2b040862012-07-27 20:18:08 -0700205 // Get the actual value we should use to step through an allocation.
Tobias Grosser7b662902013-06-21 17:07:39 -0700206 //
207 // Normally the value we use to step through an allocation is given to us by
208 // the driver. However, for certain primitive data types, we can derive an
209 // integer constant for the step value. We use this integer constant whenever
210 // possible to allow further compiler optimizations to take place.
211 //
Stephen Hinesb730e232013-01-09 15:31:36 -0800212 // DL - Target Data size/layout information.
Stephen Hines2b040862012-07-27 20:18:08 -0700213 // T - Type of allocation (should be a pointer).
214 // OrigStep - Original step increment (root.expand() input from driver).
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700215 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
Stephen Hines2b040862012-07-27 20:18:08 -0700216 llvm::Value *OrigStep) {
Stephen Hinesb730e232013-01-09 15:31:36 -0800217 bccAssert(DL);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700218 bccAssert(AllocType);
Stephen Hines2b040862012-07-27 20:18:08 -0700219 bccAssert(OrigStep);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700220 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
Tim Murray429d94a2014-10-30 15:34:01 -0700221 if (isStepOptSupported(AllocType)) {
Stephen Hines2b040862012-07-27 20:18:08 -0700222 llvm::Type *ET = PT->getElementType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800223 uint64_t ETSize = DL->getTypeAllocSize(ET);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700224 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
Stephen Hines2b040862012-07-27 20:18:08 -0700225 return llvm::ConstantInt::get(Int32Ty, ETSize);
226 } else {
227 return OrigStep;
228 }
229 }
230
Chris Wailes097ca142014-07-08 15:57:12 -0700231 /// Builds the types required by the pass for the given context.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700232 void buildTypes(void) {
David Grosse44a3522015-03-13 15:24:27 -0700233 // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700234
David Grosse44a3522015-03-13 15:24:27 -0700235 llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context);
236 llvm::Type *Int8PtrTy = Int8Ty->getPointerTo();
237 llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
238 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
239 llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
240 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
241 llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4);
Chris Wailes097ca142014-07-08 15:57:12 -0700242
243 /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
Stephen Hinesdb169182012-01-05 18:46:36 -0800244 *
David Grosse44a3522015-03-13 15:24:27 -0700245 * struct RsLaunchDimensions {
246 * uint32_t x;
Stephen Hinesdb169182012-01-05 18:46:36 -0800247 * uint32_t y;
248 * uint32_t z;
David Grosse44a3522015-03-13 15:24:27 -0700249 * uint32_t lod;
250 * uint32_t face;
251 * uint32_t array[4];
Stephen Hinesdb169182012-01-05 18:46:36 -0800252 * };
253 */
David Grosse44a3522015-03-13 15:24:27 -0700254 llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
255 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x
256 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y
257 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z
258 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod
259 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face
260 RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
261 llvm::StructType *RsLaunchDimensionsTy =
262 llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
Chris Wailes881cda42014-06-23 11:27:41 -0700263
David Gross1d93a192015-03-25 14:59:27 -0700264 /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
David Grosse44a3522015-03-13 15:24:27 -0700265 *
266 * struct RsExpandKernelDriverInfoPfx {
267 * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
268 * uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
269 * uint32_t inLen;
270 *
271 * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
272 * uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
273 * uint32_t outLen;
274 *
275 * // Dimension of the launch
276 * RsLaunchDimensions dim;
277 *
278 * // The walking iterator of the launch
279 * RsLaunchDimensions current;
280 *
281 * const void *usr;
282 * uint32_t usrLen;
283 *
284 * // Items below this line are not used by the compiler and can be change in the driver.
285 * // So the compiler must assume there are an unknown number of fields of unknown type
286 * // beginning here.
287 * };
David Gross1d93a192015-03-25 14:59:27 -0700288 *
289 * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
David Grosse44a3522015-03-13 15:24:27 -0700290 */
291 llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
292 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
293 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
294 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen
295 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
296 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
297 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen
298 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim
299 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current
300 RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr
301 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen
David Grosse32af522016-01-15 12:15:48 -0800302 RsExpandKernelDriverInfoPfxTy =
David Grosse44a3522015-03-13 15:24:27 -0700303 llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700304
305 // Create the function type for expanded kernels.
Matt Wala4e7a5062015-07-30 16:27:51 -0700306 llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700307
David Grosse44a3522015-03-13 15:24:27 -0700308 llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
Matt Wala4e7a5062015-07-30 16:27:51 -0700309 // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
310 ExpandedForEachType = llvm::FunctionType::get(VoidTy,
311 {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700312
Matt Wala4e7a5062015-07-30 16:27:51 -0700313 // void (void *inBuf, void *outBuf, uint32_t len)
314 ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false);
Tobias Grosser8ae46072013-06-20 14:00:31 -0700315 }
316
Matt Wala4e7a5062015-07-30 16:27:51 -0700317 /// @brief Create skeleton of the expanded foreach kernel.
Tobias Grosser357b5862013-06-20 14:12:46 -0700318 ///
319 /// This creates a function with the following signature:
320 ///
321 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
Chris Wailes5010f642014-07-25 15:31:32 -0700322 /// uint32_t outstep)
Tobias Grosser357b5862013-06-20 14:12:46 -0700323 ///
Matt Wala4e7a5062015-07-30 16:27:51 -0700324 llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700325 llvm::Function *ExpandedFunction =
Matt Wala4e7a5062015-07-30 16:27:51 -0700326 llvm::Function::Create(ExpandedForEachType,
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700327 llvm::GlobalValue::ExternalLinkage,
328 OldName + ".expand", Module);
Matt Wala4e7a5062015-07-30 16:27:51 -0700329 bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700330 llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700331 (AI++)->setName("p");
332 (AI++)->setName("x1");
333 (AI++)->setName("x2");
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700334 (AI++)->setName("arg_outstep");
Matt Wala4e7a5062015-07-30 16:27:51 -0700335 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
336 ExpandedFunction);
337 llvm::IRBuilder<> Builder(Begin);
338 Builder.CreateRetVoid();
339 return ExpandedFunction;
340 }
341
342 // Create skeleton of the expanded reduce kernel.
343 //
344 // This creates a function with the following signature:
345 //
346 // void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len)
347 //
348 llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) {
349 llvm::Function *ExpandedFunction =
350 llvm::Function::Create(ExpandedReduceType,
351 llvm::GlobalValue::ExternalLinkage,
352 OldName + ".expand", Module);
353 bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams);
354
355 llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
356
357 using llvm::Attribute;
358
359 llvm::Argument *InBuf = &(*AI++);
360 InBuf->setName("inBuf");
Stephen Hinesdfde70a2015-08-18 21:26:46 -0700361 InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture)));
Matt Wala4e7a5062015-07-30 16:27:51 -0700362
363 llvm::Argument *OutBuf = &(*AI++);
364 OutBuf->setName("outBuf");
Stephen Hinesdfde70a2015-08-18 21:26:46 -0700365 OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture)));
Matt Wala4e7a5062015-07-30 16:27:51 -0700366
367 (AI++)->setName("len");
Tobias Grosser802f6592013-06-20 14:27:16 -0700368
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700369 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
370 ExpandedFunction);
Tobias Grosser806075b2013-06-20 17:08:35 -0700371 llvm::IRBuilder<> Builder(Begin);
372 Builder.CreateRetVoid();
373
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700374 return ExpandedFunction;
Tobias Grosser357b5862013-06-20 14:12:46 -0700375 }
376
David Grosse32af522016-01-15 12:15:48 -0800377 // Create skeleton of a general reduce kernel's expanded accumulator.
378 //
379 // This creates a function with the following signature:
380 //
381 // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
382 // i32 %x1, i32 %x2, accumType* nocapture %accum)
383 //
384 llvm::Function *createEmptyExpandedReduceNewAccumulator(llvm::StringRef OldName,
385 llvm::Type *AccumArgTy) {
386 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
387 llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
388 llvm::FunctionType *ExpandedReduceNewAccumulatorType =
389 llvm::FunctionType::get(VoidTy,
390 {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
391 Int32Ty, Int32Ty, AccumArgTy}, false);
392 llvm::Function *FnExpandedAccumulator =
393 llvm::Function::Create(ExpandedReduceNewAccumulatorType,
394 llvm::GlobalValue::ExternalLinkage,
395 OldName + ".expand", Module);
396 bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
397
398 llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
399
400 using llvm::Attribute;
401
402 llvm::Argument *Arg_p = &(*AI++);
403 Arg_p->setName("p");
404 Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
405 llvm::makeArrayRef(Attribute::NoCapture)));
406
407 llvm::Argument *Arg_x1 = &(*AI++);
408 Arg_x1->setName("x1");
409
410 llvm::Argument *Arg_x2 = &(*AI++);
411 Arg_x2->setName("x2");
412
413 llvm::Argument *Arg_accum = &(*AI++);
414 Arg_accum->setName("accum");
415 Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
416 llvm::makeArrayRef(Attribute::NoCapture)));
417
418 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
419 FnExpandedAccumulator);
420 llvm::IRBuilder<> Builder(Begin);
421 Builder.CreateRetVoid();
422
423 return FnExpandedAccumulator;
424 }
425
Tobias Grossere4a73f62013-06-21 15:35:03 -0700426 /// @brief Create an empty loop
427 ///
428 /// Create a loop of the form:
429 ///
430 /// for (i = LowerBound; i < UpperBound; i++)
431 /// ;
432 ///
433 /// After the loop has been created, the builder is set such that
434 /// instructions can be added to the loop body.
435 ///
436 /// @param Builder The builder to use to build this loop. The current
437 /// position of the builder is the position the loop
438 /// will be inserted.
439 /// @param LowerBound The first value of the loop iterator
440 /// @param UpperBound The maximal value of the loop iterator
441 /// @param LoopIV A reference that will be set to the loop iterator.
442 /// @return The BasicBlock that will be executed after the loop.
443 llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
444 llvm::Value *LowerBound,
445 llvm::Value *UpperBound,
446 llvm::PHINode **LoopIV) {
David Grossc2ca7422015-05-29 14:54:33 -0700447 bccAssert(LowerBound->getType() == UpperBound->getType());
Tobias Grossere4a73f62013-06-21 15:35:03 -0700448
449 llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
450 llvm::Value *Cond, *IVNext;
451 llvm::PHINode *IV;
452
453 CondBB = Builder.GetInsertBlock();
Stephen Hines1bd9f622015-03-18 14:53:10 -0700454 AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700455 HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
Tobias Grossere4a73f62013-06-21 15:35:03 -0700456
457 // if (LowerBound < Upperbound)
458 // goto LoopHeader
459 // else
460 // goto AfterBB
461 CondBB->getTerminator()->eraseFromParent();
462 Builder.SetInsertPoint(CondBB);
Tobias Grossere87a0512013-06-25 15:31:11 -0700463 Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
Tobias Grossere4a73f62013-06-21 15:35:03 -0700464 Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
465
466 // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
467 // iv.next = iv + 1
468 // if (iv.next < Upperbound)
469 // goto LoopHeader
470 // else
471 // goto AfterBB
472 Builder.SetInsertPoint(HeaderBB);
473 IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
474 IV->addIncoming(LowerBound, CondBB);
475 IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
476 IV->addIncoming(IVNext, HeaderBB);
Tobias Grossere87a0512013-06-25 15:31:11 -0700477 Cond = Builder.CreateICmpULT(IVNext, UpperBound);
Tobias Grossere4a73f62013-06-21 15:35:03 -0700478 Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
479 AfterBB->setName("Exit");
480 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
481 *LoopIV = IV;
482 return AfterBB;
483 }
484
David Gross28c17992015-07-07 16:44:33 -0700485 // Finish building the outgoing argument list for calling a ForEach-able function.
486 //
487 // ArgVector - on input, the non-special arguments
488 // on output, the non-special arguments combined with the special arguments
489 // from SpecialArgVector
490 // SpecialArgVector - special arguments (from ExpandSpecialArguments())
491 // SpecialArgContextIdx - return value of ExpandSpecialArguments()
492 // (position of context argument in SpecialArgVector)
493 // CalleeFunction - the ForEach-able function being called
494 // Builder - for inserting code into the caller function
495 template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
496 void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector,
497 const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
498 const int SpecialArgContextIdx,
499 const llvm::Function &CalleeFunction,
500 llvm::IRBuilder<> &CallerBuilder) {
501 /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
502 * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
503 * two types represent the same thing). Therefore, we must introduce a pointer cast when
504 * generating a call to the kernel function.
505 */
506 const int ArgContextIdx =
507 SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
508 ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
509 if (ArgContextIdx >= 0) {
510 llvm::Type *ContextArgType = nullptr;
511 int ArgIdx = ArgContextIdx;
512 for (const auto &Arg : CalleeFunction.getArgumentList()) {
513 if (!ArgIdx--) {
514 ContextArgType = Arg.getType();
515 break;
516 }
517 }
518 bccAssert(ContextArgType);
519 ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
520 }
521 }
522
Matt Wala083ef3c2015-07-22 18:58:05 -0700523 // GEPHelper() returns a SmallVector of values suitable for passing
524 // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
525 // the returned data type. It is sized so that the SmallVector
526 // returned by GEPHelper() never needs to do a heap allocation for
527 // any list of GEP indices it encounters in the code.
528 typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
529
530 // Helper for turning a list of constant integer GEP indices into a
531 // SmallVector of llvm::Value*. The return value is suitable for
532 // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
533 //
534 // Inputs:
535 // I32Args should be integers which represent the index arguments
536 // to a GEP instruction.
537 //
538 // Returns:
539 // Returns a SmallVector of ConstantInts.
Matt Wala4e7a5062015-07-30 16:27:51 -0700540 SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700541 SmallGEPIndices Out(I32Args.size());
542 llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
543 std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
544 [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
545 return Out;
546 }
547
Tobias Grosser8ae46072013-06-20 14:00:31 -0700548public:
Matt Wala4e7a5062015-07-30 16:27:51 -0700549 RSKernelExpandPass(bool pEnableStepOpt = true)
Chris Wailes900c6c12014-08-13 15:40:00 -0700550 : ModulePass(ID), Module(nullptr), Context(nullptr),
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700551 mEnableStepOpt(pEnableStepOpt) {
552
Tobias Grosser8ae46072013-06-20 14:00:31 -0700553 }
554
Stephen Hinesc754d492015-01-08 16:00:50 -0800555 virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
556 // This pass does not use any other analysis passes, but it does
557 // add/wrap the existing functions in the module (thus altering the CFG).
558 }
559
David Gross33cda5c2015-01-30 11:41:19 -0800560 // Build contribution to outgoing argument list for calling a
David Grosse32af522016-01-15 12:15:48 -0800561 // ForEach-able function or a general reduction accumulator
562 // function, based on the special parameters of that function.
David Gross33cda5c2015-01-30 11:41:19 -0800563 //
David Grosse32af522016-01-15 12:15:48 -0800564 // Signature - metadata bits for the signature of the callee
David Gross33cda5c2015-01-30 11:41:19 -0800565 // X, Arg_p - values derived directly from expanded function,
David Grosse32af522016-01-15 12:15:48 -0800566 // suitable for computing arguments for the callee
David Gross33cda5c2015-01-30 11:41:19 -0800567 // CalleeArgs - contribution is accumulated here
568 // Bump - invoked once for each contributed outgoing argument
Matt Wala083ef3c2015-07-22 18:58:05 -0700569 // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
570 // this function can insert loop-invariant loads
David Gross28c17992015-07-07 16:44:33 -0700571 //
572 // Return value is the (zero-based) position of the context (Arg_p)
573 // argument in the CalleeArgs vector, or a negative value if the
574 // context argument is not placed in the CalleeArgs vector.
575 int ExpandSpecialArguments(uint32_t Signature,
576 llvm::Value *X,
577 llvm::Value *Arg_p,
578 llvm::IRBuilder<> &Builder,
579 llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
Matt Wala083ef3c2015-07-22 18:58:05 -0700580 std::function<void ()> Bump,
581 llvm::Instruction *LoopHeaderInsertionPoint) {
David Gross33cda5c2015-01-30 11:41:19 -0800582
David Gross28c17992015-07-07 16:44:33 -0700583 bccAssert(CalleeArgs.empty());
584
585 int Return = -1;
David Gross33cda5c2015-01-30 11:41:19 -0800586 if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
587 CalleeArgs.push_back(Arg_p);
588 Bump();
David Gross28c17992015-07-07 16:44:33 -0700589 Return = CalleeArgs.size() - 1;
David Gross33cda5c2015-01-30 11:41:19 -0800590 }
591
592 if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
593 CalleeArgs.push_back(X);
594 Bump();
595 }
596
David Grosse44a3522015-03-13 15:24:27 -0700597 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
598 bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700599 bccAssert(LoopHeaderInsertionPoint);
David Gross33cda5c2015-01-30 11:41:19 -0800600
Matt Wala083ef3c2015-07-22 18:58:05 -0700601 // Y and Z are loop invariant, so they can be hoisted out of the
602 // loop. Set the IRBuilder insertion point to the loop header.
603 auto OldInsertionPoint = Builder.saveIP();
604 Builder.SetInsertPoint(LoopHeaderInsertionPoint);
David Grosse44a3522015-03-13 15:24:27 -0700605
606 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700607 SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
608 RsLaunchDimensionsFieldY}));
609 llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
610 CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
David Grosse44a3522015-03-13 15:24:27 -0700611 Bump();
612 }
613
614 if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700615 SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
616 RsLaunchDimensionsFieldZ}));
617 llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
618 CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
David Grosse44a3522015-03-13 15:24:27 -0700619 Bump();
620 }
Matt Wala083ef3c2015-07-22 18:58:05 -0700621
622 Builder.restoreIP(OldInsertionPoint);
David Gross33cda5c2015-01-30 11:41:19 -0800623 }
David Gross28c17992015-07-07 16:44:33 -0700624
625 return Return;
David Gross33cda5c2015-01-30 11:41:19 -0800626 }
627
David Grosse32af522016-01-15 12:15:48 -0800628 // Generate loop-invariant input processing setup code for an expanded
629 // ForEach-able function or an expanded general reduction accumulator
630 // function.
631 //
632 // LoopHeader - block at the end of which the setup code will be inserted
633 // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
634 // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
635 // ArgIter - iterator pointing to first input of the UNexpanded function
636 // NumInputs - number of inputs (NOT number of ARGUMENTS)
637 //
638 // InBufPtrs[] - this function sets each array element to point to the first
639 // cell of the corresponding input allocation
640 // InStructTempSlots[] - this function sets each array element either to nullptr
641 // or to the result of an alloca (for the case where the
642 // calling convention dictates that a value must be passed
643 // by reference, and so we need a stacked temporary to hold
644 // a copy of that value)
645 void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
646 llvm::Value *Arg_p,
647 llvm::MDNode *TBAAPointer,
648 llvm::Function::arg_iterator ArgIter,
649 const size_t NumInputs,
650 llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
651 llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
652 bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
653
654 // Extract information about input slots. The work done
655 // here is loop-invariant, so we can hoist the operations out of the loop.
656 auto OldInsertionPoint = Builder.saveIP();
657 Builder.SetInsertPoint(LoopHeader->getTerminator());
658
659 for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
660 llvm::Type *InType = ArgIter->getType();
661
662 /*
663 * AArch64 calling conventions dictate that structs of sufficient size
664 * get passed by pointer instead of passed by value. This, combined
665 * with the fact that we don't allow kernels to operate on pointer
666 * data means that if we see a kernel with a pointer parameter we know
667 * that it is a struct input that has been promoted. As such we don't
668 * need to convert its type to a pointer. Later we will need to know
669 * to create a temporary copy on the stack, so we save this information
670 * in InStructTempSlots.
671 */
672 if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
673 llvm::Type *ElementType = PtrType->getElementType();
674 InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
675 "input_struct_slot"));
676 } else {
677 InType = InType->getPointerTo();
678 InStructTempSlots.push_back(nullptr);
679 }
680
681 SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
682 static_cast<int32_t>(InputIndex)}));
683 llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
684 llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
685 llvm::Value *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
686
687 if (gEnableRsTbaa) {
688 InBufPtr->setMetadata("tbaa", TBAAPointer);
689 }
690
691 InBufPtrs.push_back(CastInBufPtr);
692 }
693
694 Builder.restoreIP(OldInsertionPoint);
695 }
696
697 // Generate loop-varying input processing code for an expanded ForEach-able function
698 // or an expanded general reduction accumulator function. Also, for the call to the
699 // UNexpanded function, collect the portion of the argument list corresponding to the
700 // inputs.
701 //
702 // Arg_x1 - first X coordinate to be processed by the expanded function
703 // TBAAAllocation - metadata for marking loads of input values out of allocations
704 // NumInputs -- number of inputs (NOT number of ARGUMENTS)
705 // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
706 // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
707 // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
708 //
709 // RootArgs - this function sets this to the list of outgoing argument values corresponding
710 // to the inputs
711 void ExpandInputsBody(llvm::IRBuilder<> &Builder,
712 llvm::Value *Arg_x1,
713 llvm::MDNode *TBAAAllocation,
714 const size_t NumInputs,
715 const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
716 const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
717 llvm::Value *IndVar,
718 llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
719 llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
720
721 for (size_t Index = 0; Index < NumInputs; ++Index) {
722 llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
723 llvm::Value *Input;
724
725 llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
726
727 if (gEnableRsTbaa) {
728 InputLoad->setMetadata("tbaa", TBAAAllocation);
729 }
730
731 if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
732 // Pass a pointer to a temporary on the stack, rather than
733 // passing a pointer to the original value. We do not want
734 // the kernel to potentially modify the input data.
735
736 // Note: don't annotate with TBAA, since the kernel might
737 // have its own TBAA annotations for the pointer argument.
738 Builder.CreateStore(InputLoad, TemporarySlot);
739 Input = TemporarySlot;
740 } else {
741 Input = InputLoad;
742 }
743
744 RootArgs.push_back(Input);
745 }
746 }
747
Tobias Grosser8ae46072013-06-20 14:00:31 -0700748 /* Performs the actual optimization on a selected function. On success, the
749 * Module will contain a new function of the name "<NAME>.expand" that
750 * invokes <NAME>() in a loop with the appropriate parameters.
751 */
Matt Wala4e7a5062015-07-30 16:27:51 -0700752 bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700753 ALOGV("Expanding ForEach-able Function %s",
754 Function->getName().str().c_str());
Tobias Grosser8ae46072013-06-20 14:00:31 -0700755
756 if (!Signature) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700757 Signature = getRootSignature(Function);
Tobias Grosser8ae46072013-06-20 14:00:31 -0700758 if (!Signature) {
759 // We couldn't determine how to expand this function based on its
760 // function signature.
761 return false;
762 }
763 }
764
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700765 llvm::DataLayout DL(Module);
Tobias Grosser8ae46072013-06-20 14:00:31 -0700766
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700767 llvm::Function *ExpandedFunction =
Matt Wala4e7a5062015-07-30 16:27:51 -0700768 createEmptyExpandedForEachKernel(Function->getName());
Stephen Hinesdb169182012-01-05 18:46:36 -0800769
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700770 /*
771 * Extract the expanded function's parameters. It is guaranteed by
David Grosse32af522016-01-15 12:15:48 -0800772 * createEmptyExpandedForEachKernel that there will be four parameters.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700773 */
David Gross33cda5c2015-01-30 11:41:19 -0800774
Matt Wala4e7a5062015-07-30 16:27:51 -0700775 bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
David Gross33cda5c2015-01-30 11:41:19 -0800776
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700777 llvm::Function::arg_iterator ExpandedFunctionArgIter =
778 ExpandedFunction->arg_begin();
Stephen Hinesdb169182012-01-05 18:46:36 -0800779
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700780 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++);
781 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++);
782 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++);
Chris Wailes5010f642014-07-25 15:31:32 -0700783 llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700784
Chris Wailes900c6c12014-08-13 15:40:00 -0700785 llvm::Value *InStep = nullptr;
786 llvm::Value *OutStep = nullptr;
Stephen Hinesdb169182012-01-05 18:46:36 -0800787
788 // Construct the actual function body.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700789 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
Stephen Hinesdb169182012-01-05 18:46:36 -0800790
Stephen Hinescc366e52012-02-21 17:22:04 -0800791 // Collect and construct the arguments for the kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800792 // Note that we load any loop-invariant arguments before entering the Loop.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700793 llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
Stephen Hinesdb169182012-01-05 18:46:36 -0800794
Chris Wailes900c6c12014-08-13 15:40:00 -0700795 llvm::Type *InTy = nullptr;
Matt Wala083ef3c2015-07-22 18:58:05 -0700796 llvm::Value *InBufPtr = nullptr;
Stephen Hinesd8817752013-08-02 17:56:51 -0700797 if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700798 SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
799 llvm::LoadInst *InStepArg = Builder.CreateLoad(
800 Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
Chris Wailese10b8642014-07-15 13:18:45 -0700801
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700802 InTy = (FunctionArgIter++)->getType();
Chris Wailese10b8642014-07-15 13:18:45 -0700803 InStep = getStepValue(&DL, InTy, InStepArg);
804
Stephen Hines2b040862012-07-27 20:18:08 -0700805 InStep->setName("instep");
Chris Wailese10b8642014-07-15 13:18:45 -0700806
Matt Wala083ef3c2015-07-22 18:58:05 -0700807 SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
808 InBufPtr = Builder.CreateLoad(
809 Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
Stephen Hinesdb169182012-01-05 18:46:36 -0800810 }
811
Chris Wailes900c6c12014-08-13 15:40:00 -0700812 llvm::Type *OutTy = nullptr;
813 llvm::Value *OutBasePtr = nullptr;
Stephen Hinesd8817752013-08-02 17:56:51 -0700814 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700815 OutTy = (FunctionArgIter++)->getType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800816 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines2b040862012-07-27 20:18:08 -0700817 OutStep->setName("outstep");
Matt Wala083ef3c2015-07-22 18:58:05 -0700818 SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
819 OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
Stephen Hinesdb169182012-01-05 18:46:36 -0800820 }
821
Chris Wailes900c6c12014-08-13 15:40:00 -0700822 llvm::Value *UsrData = nullptr;
Stephen Hinesd8817752013-08-02 17:56:51 -0700823 if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700824 llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
Matt Wala083ef3c2015-07-22 18:58:05 -0700825 llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
826 UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800827 UsrData->setName("UsrData");
Stephen Hinesdb169182012-01-05 18:46:36 -0800828 }
829
Matt Wala083ef3c2015-07-22 18:58:05 -0700830 llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
Tobias Grossere4a73f62013-06-21 15:35:03 -0700831 llvm::PHINode *IV;
832 createLoop(Builder, Arg_x1, Arg_x2, &IV);
Stephen Hinesdb169182012-01-05 18:46:36 -0800833
David Gross33cda5c2015-01-30 11:41:19 -0800834 llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
David Gross28c17992015-07-07 16:44:33 -0700835 const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
Matt Wala083ef3c2015-07-22 18:58:05 -0700836 [&FunctionArgIter]() { FunctionArgIter++; },
837 LoopHeader->getTerminator());
David Gross33cda5c2015-01-30 11:41:19 -0800838
839 bccAssert(FunctionArgIter == Function->arg_end());
840
Stephen Hinescc366e52012-02-21 17:22:04 -0800841 // Populate the actual call to kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800842 llvm::SmallVector<llvm::Value*, 8> RootArgs;
843
Chris Wailes900c6c12014-08-13 15:40:00 -0700844 llvm::Value *InPtr = nullptr;
845 llvm::Value *OutPtr = nullptr;
Stephen Hinesdb169182012-01-05 18:46:36 -0800846
Tobias Grosserae937ec2013-06-27 13:49:47 -0700847 // Calculate the current input and output pointers
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700848 //
Tobias Grosserae937ec2013-06-27 13:49:47 -0700849 // We always calculate the input/output pointers with a GEP operating on i8
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700850 // values and only cast at the very end to OutTy. This is because the step
851 // between two values is given in bytes.
852 //
853 // TODO: We could further optimize the output by using a GEP operation of
854 // type 'OutTy' in cases where the element type of the allocation allows.
855 if (OutBasePtr) {
856 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
857 OutOffset = Builder.CreateMul(OutOffset, OutStep);
Matt Wala083ef3c2015-07-22 18:58:05 -0700858 OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700859 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
860 }
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700861
Matt Wala083ef3c2015-07-22 18:58:05 -0700862 if (InBufPtr) {
Tobias Grosserae937ec2013-06-27 13:49:47 -0700863 llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
864 InOffset = Builder.CreateMul(InOffset, InStep);
Matt Wala083ef3c2015-07-22 18:58:05 -0700865 InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
Tobias Grosserae937ec2013-06-27 13:49:47 -0700866 InPtr = Builder.CreatePointerCast(InPtr, InTy);
867 }
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700868
Tobias Grosserae937ec2013-06-27 13:49:47 -0700869 if (InPtr) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700870 RootArgs.push_back(InPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800871 }
872
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700873 if (OutPtr) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700874 RootArgs.push_back(OutPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800875 }
876
877 if (UsrData) {
878 RootArgs.push_back(UsrData);
879 }
880
David Gross28c17992015-07-07 16:44:33 -0700881 finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
Stephen Hinesdb169182012-01-05 18:46:36 -0800882
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700883 Builder.CreateCall(Function, RootArgs);
Stephen Hinesdb169182012-01-05 18:46:36 -0800884
Stephen Hines7ae3a822012-09-14 19:24:58 -0700885 return true;
886 }
887
Matt Wala4e7a5062015-07-30 16:27:51 -0700888 /* Expand a pass-by-value foreach kernel.
Stephen Hines7ae3a822012-09-14 19:24:58 -0700889 */
Matt Wala4e7a5062015-07-30 16:27:51 -0700890 bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
Stephen Hinesd8817752013-08-02 17:56:51 -0700891 bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700892 ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700893
Matt Wala4e7a5062015-07-30 16:27:51 -0700894 // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700895 llvm::DataLayout DL(Module);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700896
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700897 llvm::Function *ExpandedFunction =
Matt Wala4e7a5062015-07-30 16:27:51 -0700898 createEmptyExpandedForEachKernel(Function->getName());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700899
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700900 /*
901 * Extract the expanded function's parameters. It is guaranteed by
David Grosse32af522016-01-15 12:15:48 -0800902 * createEmptyExpandedForEachKernel that there will be four parameters.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700903 */
Chris Wailes881cda42014-06-23 11:27:41 -0700904
Matt Wala4e7a5062015-07-30 16:27:51 -0700905 bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
Chris Wailes881cda42014-06-23 11:27:41 -0700906
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700907 llvm::Function::arg_iterator ExpandedFunctionArgIter =
908 ExpandedFunction->arg_begin();
909
910 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++);
911 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++);
912 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++);
Matt Wala3bc475b2015-08-12 17:56:19 -0700913 // Arg_outstep is not used by expanded new-style forEach kernels.
Stephen Hines7ae3a822012-09-14 19:24:58 -0700914
Stephen Hines7ae3a822012-09-14 19:24:58 -0700915 // Construct the actual function body.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700916 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700917
Tobias Grosser18a38a32013-07-26 15:03:03 -0700918 // Create TBAA meta-data.
Stephen Hines354d1c12015-04-03 22:54:54 -0700919 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
920 *TBAAAllocation, *TBAAPointer;
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700921 llvm::MDBuilder MDHelper(*Context);
Logan Chien14588cf2014-02-20 12:35:51 +0800922
Stephen Hines354d1c12015-04-03 22:54:54 -0700923 TBAARenderScriptDistinct =
Matt Wala4e7a5062015-07-30 16:27:51 -0700924 MDHelper.createTBAARoot(kRenderScriptTBAARootName);
925 TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
Stephen Hines354d1c12015-04-03 22:54:54 -0700926 TBAARenderScriptDistinct);
Chris Wailese10b8642014-07-15 13:18:45 -0700927 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
928 TBAARenderScript);
929 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
930 TBAAAllocation, 0);
931 TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
932 TBAARenderScript);
Logan Chien14588cf2014-02-20 12:35:51 +0800933 TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
Tobias Grosser18a38a32013-07-26 15:03:03 -0700934
Chris Wailes881cda42014-06-23 11:27:41 -0700935 /*
936 * Collect and construct the arguments for the kernel().
937 *
938 * Note that we load any loop-invariant arguments before entering the Loop.
939 */
Matt Wala083ef3c2015-07-22 18:58:05 -0700940 size_t NumRemainingInputs = Function->arg_size();
Stephen Hines7ae3a822012-09-14 19:24:58 -0700941
Chris Wailes881cda42014-06-23 11:27:41 -0700942 // No usrData parameter on kernels.
943 bccAssert(
944 !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
945
946 llvm::Function::arg_iterator ArgIter = Function->arg_begin();
947
948 // Check the return type
Tim Murraybb73b742014-11-04 11:20:10 -0800949 llvm::Type *OutTy = nullptr;
Tim Murraybb73b742014-11-04 11:20:10 -0800950 llvm::LoadInst *OutBasePtr = nullptr;
951 llvm::Value *CastedOutBasePtr = nullptr;
Chris Wailes881cda42014-06-23 11:27:41 -0700952
Chris Wailese10b8642014-07-15 13:18:45 -0700953 bool PassOutByPointer = false;
Chris Wailes881cda42014-06-23 11:27:41 -0700954
Stephen Hinesd8817752013-08-02 17:56:51 -0700955 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700956 llvm::Type *OutBaseTy = Function->getReturnType();
Chris Wailes881cda42014-06-23 11:27:41 -0700957
Stephen Hines74a4b082012-09-21 19:26:48 -0700958 if (OutBaseTy->isVoidTy()) {
Chris Wailese10b8642014-07-15 13:18:45 -0700959 PassOutByPointer = true;
Chris Wailes881cda42014-06-23 11:27:41 -0700960 OutTy = ArgIter->getType();
961
962 ArgIter++;
Matt Wala083ef3c2015-07-22 18:58:05 -0700963 --NumRemainingInputs;
Stephen Hines74a4b082012-09-21 19:26:48 -0700964 } else {
Stephen Hines74a4b082012-09-21 19:26:48 -0700965 // We don't increment Args, since we are using the actual return type.
Chris Wailes881cda42014-06-23 11:27:41 -0700966 OutTy = OutBaseTy->getPointerTo();
Stephen Hines74a4b082012-09-21 19:26:48 -0700967 }
Chris Wailes881cda42014-06-23 11:27:41 -0700968
Matt Wala083ef3c2015-07-22 18:58:05 -0700969 SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
970 OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
Chris Wailes097ca142014-07-08 15:57:12 -0700971
Stephen Hines9c5263e2014-02-11 15:58:48 -0800972 if (gEnableRsTbaa) {
973 OutBasePtr->setMetadata("tbaa", TBAAPointer);
974 }
Tim Murray50f5eb42014-12-09 17:36:24 -0800975
Tim Murraybb73b742014-11-04 11:20:10 -0800976 CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
Stephen Hines74a4b082012-09-21 19:26:48 -0700977 }
978
Matt Wala083ef3c2015-07-22 18:58:05 -0700979 llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
980 llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
981
982 bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
983
984 // Create the loop structure.
985 llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
David Gross33cda5c2015-01-30 11:41:19 -0800986 llvm::PHINode *IV;
987 createLoop(Builder, Arg_x1, Arg_x2, &IV);
988
989 llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
Matt Wala083ef3c2015-07-22 18:58:05 -0700990 const int CalleeArgsContextIdx =
991 ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
992 [&NumRemainingInputs]() { --NumRemainingInputs; },
993 LoopHeader->getTerminator());
David Gross33cda5c2015-01-30 11:41:19 -0800994
Matt Wala083ef3c2015-07-22 18:58:05 -0700995 // After ExpandSpecialArguments() gets called, NumRemainingInputs
996 // counts the number of arguments to the kernel that correspond to
997 // an array entry from the InPtr field of the DriverInfo
998 // structure.
999 const size_t NumInPtrArguments = NumRemainingInputs;
Chris Wailes881cda42014-06-23 11:27:41 -07001000
Matt Wala083ef3c2015-07-22 18:58:05 -07001001 if (NumInPtrArguments > 0) {
David Grosse32af522016-01-15 12:15:48 -08001002 ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
1003 InBufPtrs, InStructTempSlots);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001004 }
1005
Stephen Hines7ae3a822012-09-14 19:24:58 -07001006 // Populate the actual call to kernel().
1007 llvm::SmallVector<llvm::Value*, 8> RootArgs;
1008
Matt Wala9296edc2015-08-05 16:32:42 -07001009 // Calculate the current input and output pointers.
Chris Wailes881cda42014-06-23 11:27:41 -07001010
1011 // Output
1012
Chris Wailes900c6c12014-08-13 15:40:00 -07001013 llvm::Value *OutPtr = nullptr;
Tim Murraybb73b742014-11-04 11:20:10 -08001014 if (CastedOutBasePtr) {
Tobias Grosser7b662902013-06-21 17:07:39 -07001015 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
Matt Wala083ef3c2015-07-22 18:58:05 -07001016 OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001017
Chris Wailese10b8642014-07-15 13:18:45 -07001018 if (PassOutByPointer) {
Chris Wailes881cda42014-06-23 11:27:41 -07001019 RootArgs.push_back(OutPtr);
Stephen Hines9c5263e2014-02-11 15:58:48 -08001020 }
Chris Wailes881cda42014-06-23 11:27:41 -07001021 }
1022
1023 // Inputs
1024
Matt Wala083ef3c2015-07-22 18:58:05 -07001025 if (NumInPtrArguments > 0) {
David Grosse32af522016-01-15 12:15:48 -08001026 ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
1027 InBufPtrs, InStructTempSlots, IV, RootArgs);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001028 }
1029
David Gross28c17992015-07-07 16:44:33 -07001030 finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001031
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001032 llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001033
Chris Wailese10b8642014-07-15 13:18:45 -07001034 if (OutPtr && !PassOutByPointer) {
Matt Wala9296edc2015-08-05 16:32:42 -07001035 RetVal->setName("call.result");
Tobias Grosser18a38a32013-07-26 15:03:03 -07001036 llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
Stephen Hines9c5263e2014-02-11 15:58:48 -08001037 if (gEnableRsTbaa) {
1038 Store->setMetadata("tbaa", TBAAAllocation);
1039 }
Stephen Hines7ae3a822012-09-14 19:24:58 -07001040 }
1041
Stephen Hinesdb169182012-01-05 18:46:36 -08001042 return true;
1043 }
1044
David Grosse32af522016-01-15 12:15:48 -08001045 // Expand a simple reduce-style kernel function.
Matt Wala4e7a5062015-07-30 16:27:51 -07001046 //
1047 // The input is a kernel which represents a binary operation,
1048 // of the form
1049 //
1050 // define foo @func(foo %a, foo %b),
1051 //
1052 // (More generally, it can be of the forms
1053 //
1054 // define void @func(foo* %ret, foo* %a, foo* %b)
1055 // define void @func(foo* %ret, foo1 %a, foo1 %b)
1056 // define foo1 @func(foo2 %a, foo2 %b)
1057 //
1058 // as a result of argument / return value conversions. Here, "foo1"
1059 // and "foo2" refer to possibly coerced types, and the coerced
1060 // argument type may be different from the coerced return type. See
1061 // "Note on coercion" below.)
1062 //
1063 // Note also, we do not expect to encounter any case when the
1064 // arguments are promoted to pointers but the return value is
1065 // unpromoted to pointer, e.g.
1066 //
1067 // define foo1 @func(foo* %a, foo* %b)
1068 //
1069 // and we will throw an assertion in this case.)
1070 //
1071 // The input kernel gets expanded into a kernel of the form
1072 //
1073 // define void @func.expand(i8* %inBuf, i8* outBuf, i32 len)
1074 //
1075 // which performs a serial reduction of `len` elements from `inBuf`,
1076 // and stores the result into `outBuf`. In pseudocode, @func.expand
1077 // does:
1078 //
1079 // inArr := (foo *)inBuf;
1080 // accum := inArr[0];
1081 // for (i := 1; i < len; ++i) {
1082 // accum := foo(accum, inArr[i]);
1083 // }
1084 // *(foo *)outBuf := accum;
1085 //
1086 // Note on coercion
1087 //
1088 // Both the return value and the argument types may undergo internal
1089 // coercion in clang as part of call lowering. As a result, the
1090 // return value type may differ from the argument type even if the
1091 // types in the RenderScript signaure are the same. For instance, the
1092 // kernel
1093 //
1094 // int3 add(int3 a, int3 b) { return a + b; }
1095 //
1096 // gets lowered by clang as
1097 //
1098 // define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce)
1099 //
1100 // under AArch64. The details of this process are found in clang,
1101 // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and
1102 // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value
1103 // is passed by pointer, then the pointed-to type is not coerced.
1104 //
1105 // Since we lack the original type information, this code does loads
1106 // and stores of allocation data by way of pointers to the coerced
1107 // type.
1108 bool ExpandReduce(llvm::Function *Function) {
1109 bccAssert(Function);
1110
David Grosse32af522016-01-15 12:15:48 -08001111 ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str());
Matt Wala4e7a5062015-07-30 16:27:51 -07001112
1113 llvm::DataLayout DL(Module);
1114
1115 // TBAA Metadata
1116 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation;
1117 llvm::MDBuilder MDHelper(*Context);
1118
1119 TBAARenderScriptDistinct =
1120 MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1121 TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1122 TBAARenderScriptDistinct);
1123 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1124 TBAARenderScript);
1125 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1126 TBAAAllocation, 0);
1127
1128 llvm::Function *ExpandedFunction =
1129 createEmptyExpandedReduceKernel(Function->getName());
1130
1131 // Extract the expanded kernel's parameters. It is guaranteed by
David Grosse32af522016-01-15 12:15:48 -08001132 // createEmptyExpandedReduceKernel that there will be 3 parameters.
Matt Wala4e7a5062015-07-30 16:27:51 -07001133 auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin();
1134
1135 llvm::Value *Arg_inBuf = &*(ExpandedFunctionArgIter++);
1136 llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++);
1137 llvm::Value *Arg_len = &*(ExpandedFunctionArgIter++);
1138
1139 bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3);
1140
1141 // Check if, instead of returning a value, the original kernel has
1142 // a pointer parameter which points to a temporary buffer into
1143 // which the return value gets written.
1144 const bool ReturnValuePointerStyle = (Function->arg_size() == 3);
1145 bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle);
1146
1147 // Check if, instead of being passed by value, the inputs to the
1148 // original kernel are passed by pointer.
1149 auto FirstArgIter = Function->arg_begin();
1150 // The second argument is always an input to the original kernel.
1151 auto SecondArgIter = std::next(FirstArgIter);
1152 const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy();
1153
1154 // Get the output type (i.e. return type of the original kernel).
1155 llvm::PointerType *OutPtrTy = nullptr;
1156 llvm::Type *OutTy = nullptr;
1157 if (ReturnValuePointerStyle) {
1158 OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType());
1159 bccAssert(OutPtrTy && "Expected a pointer parameter to kernel");
1160 OutTy = OutPtrTy->getElementType();
1161 } else {
1162 OutTy = Function->getReturnType();
1163 bccAssert(!OutTy->isVoidTy());
1164 OutPtrTy = OutTy->getPointerTo();
1165 }
1166
1167 // Get the input type (type of the arguments to the original
1168 // kernel). Some input types are different from the output type,
1169 // due to explicit coercion that the compiler performs when
1170 // lowering the parameters. See "Note on coercion" above.
1171 llvm::PointerType *InPtrTy;
1172 llvm::Type *InTy;
1173 if (InputsPointerStyle) {
1174 InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType());
1175 bccAssert(InPtrTy && "Expected a pointer parameter to kernel");
1176 bccAssert(ReturnValuePointerStyle);
1177 bccAssert(std::next(SecondArgIter)->getType() == InPtrTy &&
1178 "Input type mismatch");
1179 InTy = InPtrTy->getElementType();
1180 } else {
1181 InTy = SecondArgIter->getType();
1182 InPtrTy = InTy->getPointerTo();
1183 if (!ReturnValuePointerStyle) {
1184 bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch");
1185 } else {
1186 bccAssert(InTy == std::next(SecondArgIter)->getType() &&
1187 "Input type mismatch");
1188 }
1189 }
1190
1191 // The input type should take up the same amount of space in
1192 // memory as the output type.
1193 bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy));
1194
1195 // Construct the actual function body.
1196 llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
1197
1198 // Cast input and output buffers to appropriate types.
1199 llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy);
1200 llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy);
1201
1202 // Create a slot to pass temporary results back. This needs to be
1203 // separate from the accumulator slot because the kernel may mark
1204 // the return value slot as noalias.
1205 llvm::Value *ReturnBuf = nullptr;
1206 if (ReturnValuePointerStyle) {
1207 ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp");
1208 }
1209
1210 // Create a slot to hold the second input if the inputs are passed
1211 // by pointer to the original kernel. We cannot directly pass a
1212 // pointer to the input buffer, because the kernel may modify its
1213 // inputs.
1214 llvm::Value *SecondInputTempBuf = nullptr;
1215 if (InputsPointerStyle) {
1216 SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp");
1217 }
1218
1219 // Create a slot to accumulate temporary results, and fill it with
1220 // the first value.
1221 llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum");
1222 // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy.
1223 llvm::LoadInst *FirstElementLoad = Builder.CreateLoad(
1224 Builder.CreatePointerCast(InBuf, OutPtrTy));
1225 if (gEnableRsTbaa) {
1226 FirstElementLoad->setMetadata("tbaa", TBAAAllocation);
1227 }
1228 // Memory operations with AccumBuf shouldn't be marked with
1229 // RenderScript TBAA, since this might conflict with TBAA metadata
1230 // in the kernel function when AccumBuf is passed by pointer.
1231 Builder.CreateStore(FirstElementLoad, AccumBuf);
1232
1233 // Loop body
1234
1235 // Create the loop structure. Note that the first input in the input buffer
1236 // has already been accumulated, so that we start at index 1.
1237 llvm::PHINode *IndVar;
1238 llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1);
1239 llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar);
1240
1241 llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep");
1242
1243 // Set up arguments and call the original (unexpanded) kernel.
1244 //
1245 // The original kernel can have at most 3 arguments, which is
1246 // achieved when the signature looks like:
1247 //
1248 // define void @func(foo* %ret, bar %a, bar %b)
1249 //
1250 // (bar can be one of foo/foo.coerce/foo*).
1251 llvm::SmallVector<llvm::Value *, 3> KernelArgs;
1252
1253 if (ReturnValuePointerStyle) {
1254 KernelArgs.push_back(ReturnBuf);
1255 }
1256
1257 if (InputsPointerStyle) {
1258 bccAssert(ReturnValuePointerStyle);
1259 // Because the return buffer is copied back into the
1260 // accumulator, it's okay if the accumulator is overwritten.
1261 KernelArgs.push_back(AccumBuf);
1262
1263 llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr);
1264 if (gEnableRsTbaa) {
1265 InputLoad->setMetadata("tbaa", TBAAAllocation);
1266 }
1267 Builder.CreateStore(InputLoad, SecondInputTempBuf);
1268
1269 KernelArgs.push_back(SecondInputTempBuf);
1270 } else {
1271 // InPtrTy may be different from OutPtrTy (the type of
1272 // AccumBuf), so first cast the accumulator buffer to the
1273 // pointer type corresponding to the input argument type.
1274 KernelArgs.push_back(
1275 Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy)));
1276
1277 llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr);
1278 if (gEnableRsTbaa) {
1279 LoadedArg->setMetadata("tbaa", TBAAAllocation);
1280 }
1281 KernelArgs.push_back(LoadedArg);
1282 }
1283
1284 llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs);
1285
1286 const uint64_t ElementSize = DL.getTypeStoreSize(OutTy);
1287 const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy);
1288
1289 // Store the output in the accumulator.
1290 if (ReturnValuePointerStyle) {
1291 Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign);
1292 } else {
1293 Builder.CreateStore(RetVal, AccumBuf);
1294 }
1295
1296 // Loop exit
1297 Builder.SetInsertPoint(Exit, Exit->begin());
1298
1299 llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf);
1300 llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf);
1301 if (gEnableRsTbaa) {
1302 OutputStore->setMetadata("tbaa", TBAAAllocation);
1303 }
1304
1305 return true;
1306 }
1307
David Grosse32af522016-01-15 12:15:48 -08001308 // Certain categories of functions that make up a general
1309 // reduce-style kernel are called directly from the driver with no
1310 // expansion needed. For a function in such a category, we need to
1311 // promote linkage from static to external, to ensure that the
1312 // function is visible to the driver in the dynamic symbol table.
1313 // This promotion is safe because we don't have any kind of cross
1314 // translation unit linkage model (except for linking against
1315 // RenderScript libraries), so we do not risk name clashes.
1316 bool PromoteReduceNewFunction(const char *Name, FunctionSet &PromotedFunctions) {
1317 if (!Name) // a presumably-optional function that is not present
1318 return false;
1319
1320 llvm::Function *Fn = Module->getFunction(Name);
1321 bccAssert(Fn != nullptr);
1322 if (PromotedFunctions.insert(Fn).second) {
1323 bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1324 Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1325 return true;
1326 }
1327
1328 return false;
1329 }
1330
1331 // Expand the accumulator function for a general reduce-style kernel.
1332 //
1333 // The input is a function of the form
1334 //
1335 // define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1336 //
1337 // where all arguments except the first are the same as for a foreach kernel.
1338 //
1339 // The input accumulator function gets expanded into a function of the form
1340 //
1341 // define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1342 //
1343 // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1344 //
1345 // In pseudocode, @func.expand does:
1346 //
1347 // for (i = %x1; i < %x2; ++i) {
1348 // func(%accum,
1349 // *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1350 // [, p] [, i] [, p->current.y] [, p->current.z]);
1351 // }
1352 //
1353 // This is very similar to foreach kernel expansion with no output.
1354 bool ExpandReduceNewAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
1355 ALOGV("Expanding accumulator %s for general reduce kernel",
1356 FnAccumulator->getName().str().c_str());
1357
1358 // Create TBAA meta-data.
1359 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1360 *TBAAAllocation, *TBAAPointer;
1361 llvm::MDBuilder MDHelper(*Context);
1362 TBAARenderScriptDistinct =
1363 MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1364 TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1365 TBAARenderScriptDistinct);
1366 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1367 TBAARenderScript);
1368 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1369 TBAAAllocation, 0);
1370 TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1371 TBAARenderScript);
1372 TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1373
1374 auto AccumulatorArgIter = FnAccumulator->arg_begin();
1375
1376 // Create empty accumulator function.
1377 llvm::Function *FnExpandedAccumulator =
1378 createEmptyExpandedReduceNewAccumulator(FnAccumulator->getName(),
1379 (AccumulatorArgIter++)->getType());
1380
1381 // Extract the expanded accumulator's parameters. It is
1382 // guaranteed by createEmptyExpandedReduceNewAccumulator that
1383 // there will be 4 parameters.
1384 bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
1385 auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1386 llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++);
1387 llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++);
1388 llvm::Value *Arg_x2 = &*(ExpandedAccumulatorArgIter++);
1389 llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1390
1391 // Construct the actual function body.
1392 llvm::IRBuilder<> Builder(FnExpandedAccumulator->getEntryBlock().begin());
1393
1394 // Create the loop structure.
1395 llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
1396 llvm::PHINode *IndVar;
1397 createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1398
1399 llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1400 const int CalleeArgsContextIdx =
1401 ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1402 [](){}, LoopHeader->getTerminator());
1403
1404 llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1405 llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1406 ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
1407 InBufPtrs, InStructTempSlots);
1408
1409 // Populate the actual call to the original accumulator.
1410 llvm::SmallVector<llvm::Value*, 8> RootArgs;
1411 RootArgs.push_back(Arg_accum);
1412 ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InBufPtrs, InStructTempSlots,
1413 IndVar, RootArgs);
1414 finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1415 Builder.CreateCall(FnAccumulator, RootArgs);
1416
1417 return true;
1418 }
1419
Tobias Grosser18a38a32013-07-26 15:03:03 -07001420 /// @brief Checks if pointers to allocation internals are exposed
1421 ///
1422 /// This function verifies if through the parameters passed to the kernel
1423 /// or through calls to the runtime library the script gains access to
1424 /// pointers pointing to data within a RenderScript Allocation.
1425 /// If we know we control all loads from and stores to data within
1426 /// RenderScript allocations and if we know the run-time internal accesses
1427 /// are all annotated with RenderScript TBAA metadata, only then we
1428 /// can safely use TBAA to distinguish between generic and from-allocation
1429 /// pointers.
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001430 bool allocPointersExposed(llvm::Module &Module) {
Tobias Grosser18a38a32013-07-26 15:03:03 -07001431 // Old style kernel function can expose pointers to elements within
1432 // allocations.
1433 // TODO: Extend analysis to allow simple cases of old-style kernels.
Stephen Hines25eb5862014-05-08 18:25:50 -07001434 for (size_t i = 0; i < mExportForEachCount; ++i) {
1435 const char *Name = mExportForEachNameList[i];
1436 uint32_t Signature = mExportForEachSignatureList[i];
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001437 if (Module.getFunction(Name) &&
Stephen Hinesd8817752013-08-02 17:56:51 -07001438 !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
Tobias Grosser18a38a32013-07-26 15:03:03 -07001439 return true;
1440 }
1441 }
1442
1443 // Check for library functions that expose a pointer to an Allocation or
1444 // that are not yet annotated with RenderScript-specific tbaa information.
Matt Walae2423782015-06-30 10:56:08 -07001445 static const std::vector<const char *> Funcs{
1446 // rsGetElementAt(...)
1447 "_Z14rsGetElementAt13rs_allocationj",
1448 "_Z14rsGetElementAt13rs_allocationjj",
1449 "_Z14rsGetElementAt13rs_allocationjjj",
Tobias Grosser18a38a32013-07-26 15:03:03 -07001450
Matt Walae2423782015-06-30 10:56:08 -07001451 // rsSetElementAt()
1452 "_Z14rsSetElementAt13rs_allocationPvj",
1453 "_Z14rsSetElementAt13rs_allocationPvjj",
1454 "_Z14rsSetElementAt13rs_allocationPvjjj",
Tobias Grosser18a38a32013-07-26 15:03:03 -07001455
Matt Walae2423782015-06-30 10:56:08 -07001456 // rsGetElementAtYuv_uchar_Y()
1457 "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1458
1459 // rsGetElementAtYuv_uchar_U()
1460 "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1461
1462 // rsGetElementAtYuv_uchar_V()
1463 "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1464 };
1465
1466 for (auto FI : Funcs) {
1467 llvm::Function *Function = Module.getFunction(FI);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001468
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001469 if (!Function) {
Matt Walae2423782015-06-30 10:56:08 -07001470 ALOGE("Missing run-time function '%s'", FI);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001471 return true;
1472 }
1473
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001474 if (Function->getNumUses() > 0) {
Tobias Grosser18a38a32013-07-26 15:03:03 -07001475 return true;
1476 }
1477 }
1478
1479 return false;
1480 }
1481
1482 /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
1483 ///
1484 /// The TBAA metadata used to annotate loads/stores from RenderScript
Chris Wailese10b8642014-07-15 13:18:45 -07001485 /// Allocations is generated in a separate TBAA tree with a
Stephen Hines354d1c12015-04-03 22:54:54 -07001486 /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1487 /// all nodes in unrelated alias analysis trees. This function makes the
1488 /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
Chris Wailese10b8642014-07-15 13:18:45 -07001489 /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1490 /// the connected trees every access to an Allocation is resolved to
1491 /// must-alias if compared to a normal C/C++ access.
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001492 void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1493 llvm::MDBuilder MDHelper(*Context);
Stephen Hines354d1c12015-04-03 22:54:54 -07001494 llvm::MDNode *TBAARenderScriptDistinct =
1495 MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1496 llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1497 "RenderScript TBAA", TBAARenderScriptDistinct);
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001498 llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA");
Stephen Hines354d1c12015-04-03 22:54:54 -07001499 TBAARenderScript->replaceOperandWith(1, TBAARoot);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001500 }
1501
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001502 virtual bool runOnModule(llvm::Module &Module) {
1503 bool Changed = false;
1504 this->Module = &Module;
Matt Wala4e7a5062015-07-30 16:27:51 -07001505 Context = &Module.getContext();
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001506
Matt Wala4e7a5062015-07-30 16:27:51 -07001507 buildTypes();
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001508
1509 bcinfo::MetadataExtractor me(&Module);
Stephen Hines25eb5862014-05-08 18:25:50 -07001510 if (!me.extract()) {
1511 ALOGE("Could not extract metadata from module!");
1512 return false;
1513 }
Matt Wala4e7a5062015-07-30 16:27:51 -07001514
1515 // Expand forEach_* style kernels.
Stephen Hines25eb5862014-05-08 18:25:50 -07001516 mExportForEachCount = me.getExportForEachSignatureCount();
1517 mExportForEachNameList = me.getExportForEachNameList();
1518 mExportForEachSignatureList = me.getExportForEachSignatureList();
Stephen Hinesdb169182012-01-05 18:46:36 -08001519
Stephen Hines25eb5862014-05-08 18:25:50 -07001520 for (size_t i = 0; i < mExportForEachCount; ++i) {
1521 const char *name = mExportForEachNameList[i];
1522 uint32_t signature = mExportForEachSignatureList[i];
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001523 llvm::Function *kernel = Module.getFunction(name);
Tobias Grossercd5b6572013-07-01 15:04:07 -07001524 if (kernel) {
Stephen Hinesd8817752013-08-02 17:56:51 -07001525 if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
Matt Wala4e7a5062015-07-30 16:27:51 -07001526 Changed |= ExpandForEach(kernel, signature);
Tobias Grosseracde6012013-07-02 14:28:01 -07001527 kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1528 } else if (kernel->getReturnType()->isVoidTy()) {
Matt Wala4e7a5062015-07-30 16:27:51 -07001529 Changed |= ExpandOldStyleForEach(kernel, signature);
Tobias Grosseracde6012013-07-02 14:28:01 -07001530 kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1531 } else {
1532 // There are some graphics root functions that are not
1533 // expanded, but that will be called directly. For those
1534 // functions, we can not set the linkage to internal.
1535 }
Stephen Hinescc366e52012-02-21 17:22:04 -08001536 }
Stephen Hinesdb169182012-01-05 18:46:36 -08001537 }
1538
David Grosse32af522016-01-15 12:15:48 -08001539 // Expand simple reduce_* style kernels.
Matt Wala4e7a5062015-07-30 16:27:51 -07001540 mExportReduceCount = me.getExportReduceCount();
1541 mExportReduceNameList = me.getExportReduceNameList();
1542
1543 for (size_t i = 0; i < mExportReduceCount; ++i) {
1544 llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]);
1545 if (kernel) {
1546 Changed |= ExpandReduce(kernel);
1547 }
1548 }
1549
David Grosse32af522016-01-15 12:15:48 -08001550 // Process general reduce_* style functions.
1551 const size_t ExportReduceNewCount = me.getExportReduceNewCount();
1552 const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList();
1553 // Note that functions can be shared between kernels
1554 FunctionSet PromotedFunctions, ExpandedAccumulators;
1555
1556 for (size_t i = 0; i < ExportReduceNewCount; ++i) {
1557 Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions);
1558 Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions);
1559
1560 // Accumulator
1561 llvm::Function *accumulator = Module.getFunction(ExportReduceNewList[i].mAccumulatorName);
1562 bccAssert(accumulator != nullptr);
1563 if (ExpandedAccumulators.insert(accumulator).second)
1564 Changed |= ExpandReduceNewAccumulator(accumulator,
1565 ExportReduceNewList[i].mSignature,
1566 ExportReduceNewList[i].mInputCount);
1567 }
1568
Matt Wala4e7a5062015-07-30 16:27:51 -07001569 if (gEnableRsTbaa && !allocPointersExposed(Module)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001570 connectRenderScriptTBAAMetadata(Module);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001571 }
1572
Stephen Hinescc366e52012-02-21 17:22:04 -08001573 return Changed;
Stephen Hinesdb169182012-01-05 18:46:36 -08001574 }
1575
1576 virtual const char *getPassName() const {
Matt Wala4e7a5062015-07-30 16:27:51 -07001577 return "forEach_* and reduce_* function expansion";
Stephen Hinesdb169182012-01-05 18:46:36 -08001578 }
1579
Matt Wala4e7a5062015-07-30 16:27:51 -07001580}; // end RSKernelExpandPass
Stephen Hinesdb169182012-01-05 18:46:36 -08001581
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001582} // end anonymous namespace
1583
Matt Wala4e7a5062015-07-30 16:27:51 -07001584char RSKernelExpandPass::ID = 0;
1585static llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
Stephen Hinesdb169182012-01-05 18:46:36 -08001586
1587namespace bcc {
1588
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001589llvm::ModulePass *
Matt Wala4e7a5062015-07-30 16:27:51 -07001590createRSKernelExpandPass(bool pEnableStepOpt) {
1591 return new RSKernelExpandPass(pEnableStepOpt);
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001592}
Stephen Hinesdb169182012-01-05 18:46:36 -08001593
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001594} // end namespace bcc