blob: 2f451d77e003ca7b0db04016abce5036e339b14e [file] [log] [blame]
Stephen Hinesdb169182012-01-05 18:46:36 -08001/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Jean-Luc Brouilleta2dd52f2017-02-16 20:57:26 -080017#include "Assert.h"
18#include "Log.h"
19#include "RSTransforms.h"
20#include "RSUtils.h"
21
22#include "bcc/Config.h"
23#include "bcinfo/MetadataExtractor.h"
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070024
David Gross97e50992017-03-29 20:52:30 +000025#include "slang_version.h"
26
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070027#include <cstdlib>
David Gross33cda5c2015-01-30 11:41:19 -080028#include <functional>
David Grosse32af522016-01-15 12:15:48 -080029#include <unordered_set>
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070030
Stephen Hinesb730e232013-01-09 15:31:36 -080031#include <llvm/IR/DerivedTypes.h>
32#include <llvm/IR/Function.h>
33#include <llvm/IR/Instructions.h>
34#include <llvm/IR/IRBuilder.h>
Tobias Grosser18a38a32013-07-26 15:03:03 -070035#include <llvm/IR/MDBuilder.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080036#include <llvm/IR/Module.h>
Zonr Changc72c4dd2012-04-12 15:38:53 +080037#include <llvm/Pass.h>
Stephen Hines7ae3a822012-09-14 19:24:58 -070038#include <llvm/Support/raw_ostream.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080039#include <llvm/IR/DataLayout.h>
Tobias Grossercd5b6572013-07-01 15:04:07 -070040#include <llvm/IR/Function.h>
Stephen Hinesb730e232013-01-09 15:31:36 -080041#include <llvm/IR/Type.h>
Tobias Grosser806075b2013-06-20 17:08:35 -070042#include <llvm/Transforms/Utils/BasicBlockUtils.h>
Stephen Hinesdb169182012-01-05 18:46:36 -080043
Matt Wala4e7a5062015-07-30 16:27:51 -070044#ifndef __DISABLE_ASSERTS
45// Only used in bccAssert()
46const int kNumExpandedForeachParams = 4;
David Gross9fa4d442016-06-02 14:46:55 -070047const int kNumExpandedReduceAccumulatorParams = 4;
Matt Wala4e7a5062015-07-30 16:27:51 -070048#endif
49
50const char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
51const char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
Chris Wailesbdbff6e2014-06-13 13:47:19 -070052
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070053using namespace bcc;
54
Stephen Hinesdb169182012-01-05 18:46:36 -080055namespace {
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070056
Stephen Hines354d1c12015-04-03 22:54:54 -070057static const bool gEnableRsTbaa = true;
Stephen Hines9c5263e2014-02-11 15:58:48 -080058
David Gross797b5162016-02-10 11:19:43 -080059/* RSKernelExpandPass
60 *
61 * This pass generates functions used to implement calls via
62 * rsForEach(), "foreach_<NAME>", or "reduce_<NAME>". We create an
63 * inner loop for the function to be invoked over the appropriate data
64 * cells of the input/output allocations (adjusting other relevant
65 * parameters as we go). We support doing this for any forEach or
66 * reduce style compute kernels.
67 *
68 * In the case of a foreach kernel or a simple reduction kernel, the
69 * new function name is the original function name "<NAME>" followed
70 * by ".expand" -- "<NAME>.expand".
71 *
72 * In the case of a general reduction kernel, the kernel's accumulator
73 * function is the one transformed, and the new function name is the
74 * original accumulator function name "<ACCUMFN>" followed by
75 * ".expand" -- "<ACCUMFN>.expand". Using the name "<ACCUMFN>.expand"
76 * for the function generated from the accumulator should not
77 * introduce any possibility for name clashes today: The accumulator
78 * function <ACCUMFN> must be static, so it cannot also serve as a
79 * foreach kernel; and the code for <ACCUMFN>.expand depends only on
80 * <ACCUMFN>, not on any other properties of the reduction kernel, so
81 * any reduction kernels that share the accumulator <ACCUMFN> can
82 * share <ACCUMFN>.expand also.
83 *
84 * Note that this pass does not delete the original function <NAME> or
85 * <ACCUMFN>. However, if it is inlined into the newly-generated
86 * function and not otherwise referenced, then a subsequent pass may
87 * delete it.
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -070088 */
Matt Wala4e7a5062015-07-30 16:27:51 -070089class RSKernelExpandPass : public llvm::ModulePass {
David Gross33cda5c2015-01-30 11:41:19 -080090public:
Stephen Hinesdb169182012-01-05 18:46:36 -080091 static char ID;
92
David Gross33cda5c2015-01-30 11:41:19 -080093private:
David Gross97e50992017-03-29 20:52:30 +000094 static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
David Grosse44a3522015-03-13 15:24:27 -070095
David Grosse32af522016-01-15 12:15:48 -080096 typedef std::unordered_set<llvm::Function *> FunctionSet;
97
David Grosse44a3522015-03-13 15:24:27 -070098 enum RsLaunchDimensionsField {
99 RsLaunchDimensionsFieldX,
100 RsLaunchDimensionsFieldY,
101 RsLaunchDimensionsFieldZ,
102 RsLaunchDimensionsFieldLod,
103 RsLaunchDimensionsFieldFace,
104 RsLaunchDimensionsFieldArray,
105
106 RsLaunchDimensionsFieldCount
107 };
108
109 enum RsExpandKernelDriverInfoPfxField {
110 RsExpandKernelDriverInfoPfxFieldInPtr,
111 RsExpandKernelDriverInfoPfxFieldInStride,
112 RsExpandKernelDriverInfoPfxFieldInLen,
113 RsExpandKernelDriverInfoPfxFieldOutPtr,
114 RsExpandKernelDriverInfoPfxFieldOutStride,
115 RsExpandKernelDriverInfoPfxFieldOutLen,
116 RsExpandKernelDriverInfoPfxFieldDim,
117 RsExpandKernelDriverInfoPfxFieldCurrent,
118 RsExpandKernelDriverInfoPfxFieldUsr,
119 RsExpandKernelDriverInfoPfxFieldUsLenr,
120
121 RsExpandKernelDriverInfoPfxFieldCount
122 };
David Gross33cda5c2015-01-30 11:41:19 -0800123
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700124 llvm::Module *Module;
125 llvm::LLVMContext *Context;
126
127 /*
Matt Wala4e7a5062015-07-30 16:27:51 -0700128 * Pointers to LLVM type information for the the function signatures
129 * for expanded functions. These must be re-calculated for each module
130 * the pass is run on.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700131 */
David Gross9fa4d442016-06-02 14:46:55 -0700132 llvm::FunctionType *ExpandedForEachType;
David Grosse32af522016-01-15 12:15:48 -0800133 llvm::Type *RsExpandKernelDriverInfoPfxTy;
Stephen Hinesdb169182012-01-05 18:46:36 -0800134
David Gross97e50992017-03-29 20:52:30 +0000135 // Initialized when we begin to process each Module
136 bool mStructExplicitlyPaddedBySlang;
Stephen Hines25eb5862014-05-08 18:25:50 -0700137 uint32_t mExportForEachCount;
138 const char **mExportForEachNameList;
139 const uint32_t *mExportForEachSignatureList;
Stephen Hinescc366e52012-02-21 17:22:04 -0800140
Stephen Hines2b040862012-07-27 20:18:08 -0700141 // Turns on optimization of allocation stride values.
142 bool mEnableStepOpt;
143
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700144 uint32_t getRootSignature(llvm::Function *Function) {
Stephen Hinesdb169182012-01-05 18:46:36 -0800145 const llvm::NamedMDNode *ExportForEachMetadata =
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700146 Module->getNamedMetadata("#rs_export_foreach");
Stephen Hinesdb169182012-01-05 18:46:36 -0800147
148 if (!ExportForEachMetadata) {
149 llvm::SmallVector<llvm::Type*, 8> RootArgTys;
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700150 for (llvm::Function::arg_iterator B = Function->arg_begin(),
151 E = Function->arg_end();
Stephen Hinesdb169182012-01-05 18:46:36 -0800152 B != E;
153 ++B) {
154 RootArgTys.push_back(B->getType());
155 }
156
157 // For pre-ICS bitcode, we may not have signature information. In that
158 // case, we use the size of the RootArgTys to select the number of
159 // arguments.
160 return (1 << RootArgTys.size()) - 1;
161 }
162
Stephen Hines7ae3a822012-09-14 19:24:58 -0700163 if (ExportForEachMetadata->getNumOperands() == 0) {
164 return 0;
165 }
166
Stephen Hines6e9e89d2012-07-27 19:16:04 -0700167 bccAssert(ExportForEachMetadata->getNumOperands() > 0);
Stephen Hinesdb169182012-01-05 18:46:36 -0800168
Stephen Hinescc366e52012-02-21 17:22:04 -0800169 // We only handle the case for legacy root() functions here, so this is
170 // hard-coded to look at only the first such function.
Stephen Hinesdb169182012-01-05 18:46:36 -0800171 llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
Chris Wailes900c6c12014-08-13 15:40:00 -0700172 if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
Stephen Hines1bd9f622015-03-18 14:53:10 -0700173 llvm::Metadata *SigMD = SigNode->getOperand(0);
174 if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
175 llvm::StringRef SigString = SigS->getString();
Stephen Hinesdb169182012-01-05 18:46:36 -0800176 uint32_t Signature = 0;
177 if (SigString.getAsInteger(10, Signature)) {
178 ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
179 return 0;
180 }
181 return Signature;
182 }
183 }
184
185 return 0;
186 }
187
Tim Murray429d94a2014-10-30 15:34:01 -0700188 bool isStepOptSupported(llvm::Type *AllocType) {
189
190 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
191 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
192
193 if (mEnableStepOpt) {
194 return false;
195 }
196
197 if (AllocType == VoidPtrTy) {
198 return false;
199 }
200
201 if (!PT) {
202 return false;
203 }
204
205 // remaining conditions are 64-bit only
206 if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
207 return true;
208 }
209
210 // coerce suggests an upconverted struct type, which we can't support
211 if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
212 return false;
213 }
214
215 // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
216 llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
217 llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
218 if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
219 return false;
220 }
221
222 return true;
223 }
224
Stephen Hines2b040862012-07-27 20:18:08 -0700225 // Get the actual value we should use to step through an allocation.
Tobias Grosser7b662902013-06-21 17:07:39 -0700226 //
227 // Normally the value we use to step through an allocation is given to us by
228 // the driver. However, for certain primitive data types, we can derive an
229 // integer constant for the step value. We use this integer constant whenever
230 // possible to allow further compiler optimizations to take place.
231 //
Stephen Hinesb730e232013-01-09 15:31:36 -0800232 // DL - Target Data size/layout information.
Stephen Hines2b040862012-07-27 20:18:08 -0700233 // T - Type of allocation (should be a pointer).
234 // OrigStep - Original step increment (root.expand() input from driver).
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700235 llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
Stephen Hines2b040862012-07-27 20:18:08 -0700236 llvm::Value *OrigStep) {
Stephen Hinesb730e232013-01-09 15:31:36 -0800237 bccAssert(DL);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700238 bccAssert(AllocType);
Stephen Hines2b040862012-07-27 20:18:08 -0700239 bccAssert(OrigStep);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700240 llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
Tim Murray429d94a2014-10-30 15:34:01 -0700241 if (isStepOptSupported(AllocType)) {
Stephen Hines2b040862012-07-27 20:18:08 -0700242 llvm::Type *ET = PT->getElementType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800243 uint64_t ETSize = DL->getTypeAllocSize(ET);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700244 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
Stephen Hines2b040862012-07-27 20:18:08 -0700245 return llvm::ConstantInt::get(Int32Ty, ETSize);
246 } else {
247 return OrigStep;
248 }
249 }
250
Chris Wailes097ca142014-07-08 15:57:12 -0700251 /// Builds the types required by the pass for the given context.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700252 void buildTypes(void) {
David Grosse44a3522015-03-13 15:24:27 -0700253 // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700254
David Grosse44a3522015-03-13 15:24:27 -0700255 llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context);
256 llvm::Type *Int8PtrTy = Int8Ty->getPointerTo();
257 llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
258 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
259 llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
260 llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
261 llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4);
Chris Wailes097ca142014-07-08 15:57:12 -0700262
263 /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
Stephen Hinesdb169182012-01-05 18:46:36 -0800264 *
David Grosse44a3522015-03-13 15:24:27 -0700265 * struct RsLaunchDimensions {
266 * uint32_t x;
Stephen Hinesdb169182012-01-05 18:46:36 -0800267 * uint32_t y;
268 * uint32_t z;
David Grosse44a3522015-03-13 15:24:27 -0700269 * uint32_t lod;
270 * uint32_t face;
271 * uint32_t array[4];
Stephen Hinesdb169182012-01-05 18:46:36 -0800272 * };
273 */
David Grosse44a3522015-03-13 15:24:27 -0700274 llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
275 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x
276 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y
277 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z
278 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod
279 RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face
280 RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
281 llvm::StructType *RsLaunchDimensionsTy =
282 llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
Chris Wailes881cda42014-06-23 11:27:41 -0700283
David Gross1d93a192015-03-25 14:59:27 -0700284 /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
David Grosse44a3522015-03-13 15:24:27 -0700285 *
286 * struct RsExpandKernelDriverInfoPfx {
287 * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
288 * uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
289 * uint32_t inLen;
290 *
291 * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
292 * uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
293 * uint32_t outLen;
294 *
295 * // Dimension of the launch
296 * RsLaunchDimensions dim;
297 *
298 * // The walking iterator of the launch
299 * RsLaunchDimensions current;
300 *
301 * const void *usr;
302 * uint32_t usrLen;
303 *
304 * // Items below this line are not used by the compiler and can be change in the driver.
305 * // So the compiler must assume there are an unknown number of fields of unknown type
306 * // beginning here.
307 * };
David Gross1d93a192015-03-25 14:59:27 -0700308 *
309 * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
David Grosse44a3522015-03-13 15:24:27 -0700310 */
311 llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
312 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
313 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
314 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen
315 RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
316 RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
317 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen
318 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim
319 RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current
320 RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr
321 RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen
David Grosse32af522016-01-15 12:15:48 -0800322 RsExpandKernelDriverInfoPfxTy =
David Grosse44a3522015-03-13 15:24:27 -0700323 llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700324
325 // Create the function type for expanded kernels.
Matt Wala4e7a5062015-07-30 16:27:51 -0700326 llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700327
David Grosse44a3522015-03-13 15:24:27 -0700328 llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
Matt Wala4e7a5062015-07-30 16:27:51 -0700329 // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
330 ExpandedForEachType = llvm::FunctionType::get(VoidTy,
331 {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
Tobias Grosser8ae46072013-06-20 14:00:31 -0700332 }
333
Matt Wala4e7a5062015-07-30 16:27:51 -0700334 /// @brief Create skeleton of the expanded foreach kernel.
Tobias Grosser357b5862013-06-20 14:12:46 -0700335 ///
336 /// This creates a function with the following signature:
337 ///
338 /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
Chris Wailes5010f642014-07-25 15:31:32 -0700339 /// uint32_t outstep)
Tobias Grosser357b5862013-06-20 14:12:46 -0700340 ///
Matt Wala4e7a5062015-07-30 16:27:51 -0700341 llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700342 llvm::Function *ExpandedFunction =
Matt Wala4e7a5062015-07-30 16:27:51 -0700343 llvm::Function::Create(ExpandedForEachType,
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700344 llvm::GlobalValue::ExternalLinkage,
345 OldName + ".expand", Module);
Matt Wala4e7a5062015-07-30 16:27:51 -0700346 bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700347 llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700348 (AI++)->setName("p");
349 (AI++)->setName("x1");
350 (AI++)->setName("x2");
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700351 (AI++)->setName("arg_outstep");
Matt Wala4e7a5062015-07-30 16:27:51 -0700352 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
353 ExpandedFunction);
354 llvm::IRBuilder<> Builder(Begin);
355 Builder.CreateRetVoid();
356 return ExpandedFunction;
357 }
358
David Grosse32af522016-01-15 12:15:48 -0800359 // Create skeleton of a general reduce kernel's expanded accumulator.
360 //
361 // This creates a function with the following signature:
362 //
363 // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
364 // i32 %x1, i32 %x2, accumType* nocapture %accum)
365 //
David Gross9fa4d442016-06-02 14:46:55 -0700366 llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,
367 llvm::Type *AccumArgTy) {
David Grosse32af522016-01-15 12:15:48 -0800368 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
369 llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
David Gross9fa4d442016-06-02 14:46:55 -0700370 llvm::FunctionType *ExpandedReduceAccumulatorType =
David Grosse32af522016-01-15 12:15:48 -0800371 llvm::FunctionType::get(VoidTy,
372 {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
373 Int32Ty, Int32Ty, AccumArgTy}, false);
374 llvm::Function *FnExpandedAccumulator =
David Gross9fa4d442016-06-02 14:46:55 -0700375 llvm::Function::Create(ExpandedReduceAccumulatorType,
David Grosse32af522016-01-15 12:15:48 -0800376 llvm::GlobalValue::ExternalLinkage,
377 OldName + ".expand", Module);
David Gross9fa4d442016-06-02 14:46:55 -0700378 bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
David Grosse32af522016-01-15 12:15:48 -0800379
380 llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
381
382 using llvm::Attribute;
383
384 llvm::Argument *Arg_p = &(*AI++);
385 Arg_p->setName("p");
386 Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
387 llvm::makeArrayRef(Attribute::NoCapture)));
388
389 llvm::Argument *Arg_x1 = &(*AI++);
390 Arg_x1->setName("x1");
391
392 llvm::Argument *Arg_x2 = &(*AI++);
393 Arg_x2->setName("x2");
394
395 llvm::Argument *Arg_accum = &(*AI++);
396 Arg_accum->setName("accum");
397 Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
398 llvm::makeArrayRef(Attribute::NoCapture)));
399
400 llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
401 FnExpandedAccumulator);
402 llvm::IRBuilder<> Builder(Begin);
403 Builder.CreateRetVoid();
404
405 return FnExpandedAccumulator;
406 }
407
Tobias Grossere4a73f62013-06-21 15:35:03 -0700408 /// @brief Create an empty loop
409 ///
410 /// Create a loop of the form:
411 ///
412 /// for (i = LowerBound; i < UpperBound; i++)
413 /// ;
414 ///
415 /// After the loop has been created, the builder is set such that
416 /// instructions can be added to the loop body.
417 ///
418 /// @param Builder The builder to use to build this loop. The current
419 /// position of the builder is the position the loop
420 /// will be inserted.
421 /// @param LowerBound The first value of the loop iterator
422 /// @param UpperBound The maximal value of the loop iterator
423 /// @param LoopIV A reference that will be set to the loop iterator.
424 /// @return The BasicBlock that will be executed after the loop.
425 llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
426 llvm::Value *LowerBound,
427 llvm::Value *UpperBound,
Dean De Leo4165d292015-11-25 12:55:21 +0000428 llvm::Value **LoopIV) {
David Grossc2ca7422015-05-29 14:54:33 -0700429 bccAssert(LowerBound->getType() == UpperBound->getType());
Tobias Grossere4a73f62013-06-21 15:35:03 -0700430
431 llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
Dean De Leo4165d292015-11-25 12:55:21 +0000432 llvm::Value *Cond, *IVNext, *IV, *IVVar;
Tobias Grossere4a73f62013-06-21 15:35:03 -0700433
434 CondBB = Builder.GetInsertBlock();
Pirama Arumuga Nainarf229c402016-03-06 23:05:45 -0800435 AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700436 HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
Tobias Grossere4a73f62013-06-21 15:35:03 -0700437
Dean De Leo4165d292015-11-25 12:55:21 +0000438 CondBB->getTerminator()->eraseFromParent();
439 Builder.SetInsertPoint(CondBB);
440
441 // decltype(LowerBound) *ivvar = alloca(sizeof(int))
442 // *ivvar = LowerBound
443 IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME);
444 Builder.CreateStore(LowerBound, IVVar);
445
Tobias Grossere4a73f62013-06-21 15:35:03 -0700446 // if (LowerBound < Upperbound)
447 // goto LoopHeader
448 // else
449 // goto AfterBB
Tobias Grossere87a0512013-06-25 15:31:11 -0700450 Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
Tobias Grossere4a73f62013-06-21 15:35:03 -0700451 Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
452
Dean De Leo4165d292015-11-25 12:55:21 +0000453 // LoopHeader:
454 // iv = *ivvar
455 // <insertion point here>
456 // iv.next = iv + 1
457 // *ivvar = iv.next
458 // if (iv.next < Upperbound)
459 // goto LoopHeader
460 // else
461 // goto AfterBB
462 // AfterBB:
Tobias Grossere4a73f62013-06-21 15:35:03 -0700463 Builder.SetInsertPoint(HeaderBB);
Dean De Leo4165d292015-11-25 12:55:21 +0000464 IV = Builder.CreateLoad(IVVar, "X");
Tobias Grossere4a73f62013-06-21 15:35:03 -0700465 IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
Dean De Leo4165d292015-11-25 12:55:21 +0000466 Builder.CreateStore(IVNext, IVVar);
Tobias Grossere87a0512013-06-25 15:31:11 -0700467 Cond = Builder.CreateICmpULT(IVNext, UpperBound);
Tobias Grossere4a73f62013-06-21 15:35:03 -0700468 Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
469 AfterBB->setName("Exit");
Dean De Leo4165d292015-11-25 12:55:21 +0000470 Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext));
471
472 // Record information about this loop.
Tobias Grossere4a73f62013-06-21 15:35:03 -0700473 *LoopIV = IV;
474 return AfterBB;
475 }
476
David Gross28c17992015-07-07 16:44:33 -0700477 // Finish building the outgoing argument list for calling a ForEach-able function.
478 //
479 // ArgVector - on input, the non-special arguments
480 // on output, the non-special arguments combined with the special arguments
481 // from SpecialArgVector
482 // SpecialArgVector - special arguments (from ExpandSpecialArguments())
483 // SpecialArgContextIdx - return value of ExpandSpecialArguments()
484 // (position of context argument in SpecialArgVector)
485 // CalleeFunction - the ForEach-able function being called
486 // Builder - for inserting code into the caller function
487 template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
488 void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector,
489 const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
490 const int SpecialArgContextIdx,
491 const llvm::Function &CalleeFunction,
492 llvm::IRBuilder<> &CallerBuilder) {
493 /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
494 * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
495 * two types represent the same thing). Therefore, we must introduce a pointer cast when
496 * generating a call to the kernel function.
497 */
498 const int ArgContextIdx =
499 SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
500 ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
501 if (ArgContextIdx >= 0) {
502 llvm::Type *ContextArgType = nullptr;
503 int ArgIdx = ArgContextIdx;
504 for (const auto &Arg : CalleeFunction.getArgumentList()) {
505 if (!ArgIdx--) {
506 ContextArgType = Arg.getType();
507 break;
508 }
509 }
510 bccAssert(ContextArgType);
511 ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
512 }
513 }
514
Matt Wala083ef3c2015-07-22 18:58:05 -0700515 // GEPHelper() returns a SmallVector of values suitable for passing
516 // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
517 // the returned data type. It is sized so that the SmallVector
518 // returned by GEPHelper() never needs to do a heap allocation for
519 // any list of GEP indices it encounters in the code.
520 typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
521
522 // Helper for turning a list of constant integer GEP indices into a
523 // SmallVector of llvm::Value*. The return value is suitable for
524 // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
525 //
526 // Inputs:
527 // I32Args should be integers which represent the index arguments
528 // to a GEP instruction.
529 //
530 // Returns:
531 // Returns a SmallVector of ConstantInts.
Matt Wala4e7a5062015-07-30 16:27:51 -0700532 SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700533 SmallGEPIndices Out(I32Args.size());
534 llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
535 std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
536 [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
537 return Out;
538 }
539
Tobias Grosser8ae46072013-06-20 14:00:31 -0700540public:
Chih-Hung Hsieh7e920a72016-04-29 14:51:50 -0700541 explicit RSKernelExpandPass(bool pEnableStepOpt = true)
Chris Wailes900c6c12014-08-13 15:40:00 -0700542 : ModulePass(ID), Module(nullptr), Context(nullptr),
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700543 mEnableStepOpt(pEnableStepOpt) {
544
Tobias Grosser8ae46072013-06-20 14:00:31 -0700545 }
546
Stephen Hinesc754d492015-01-08 16:00:50 -0800547 virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
548 // This pass does not use any other analysis passes, but it does
549 // add/wrap the existing functions in the module (thus altering the CFG).
550 }
551
David Gross33cda5c2015-01-30 11:41:19 -0800552 // Build contribution to outgoing argument list for calling a
David Grosse32af522016-01-15 12:15:48 -0800553 // ForEach-able function or a general reduction accumulator
554 // function, based on the special parameters of that function.
David Gross33cda5c2015-01-30 11:41:19 -0800555 //
David Grosse32af522016-01-15 12:15:48 -0800556 // Signature - metadata bits for the signature of the callee
David Gross33cda5c2015-01-30 11:41:19 -0800557 // X, Arg_p - values derived directly from expanded function,
David Grosse32af522016-01-15 12:15:48 -0800558 // suitable for computing arguments for the callee
David Gross33cda5c2015-01-30 11:41:19 -0800559 // CalleeArgs - contribution is accumulated here
560 // Bump - invoked once for each contributed outgoing argument
Matt Wala083ef3c2015-07-22 18:58:05 -0700561 // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
562 // this function can insert loop-invariant loads
David Gross28c17992015-07-07 16:44:33 -0700563 //
564 // Return value is the (zero-based) position of the context (Arg_p)
565 // argument in the CalleeArgs vector, or a negative value if the
566 // context argument is not placed in the CalleeArgs vector.
567 int ExpandSpecialArguments(uint32_t Signature,
568 llvm::Value *X,
569 llvm::Value *Arg_p,
570 llvm::IRBuilder<> &Builder,
571 llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
Chih-Hung Hsieh8a019dd2016-08-12 15:49:55 -0700572 const std::function<void ()> &Bump,
Matt Wala083ef3c2015-07-22 18:58:05 -0700573 llvm::Instruction *LoopHeaderInsertionPoint) {
David Gross33cda5c2015-01-30 11:41:19 -0800574
David Gross28c17992015-07-07 16:44:33 -0700575 bccAssert(CalleeArgs.empty());
576
577 int Return = -1;
David Gross33cda5c2015-01-30 11:41:19 -0800578 if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
579 CalleeArgs.push_back(Arg_p);
580 Bump();
David Gross28c17992015-07-07 16:44:33 -0700581 Return = CalleeArgs.size() - 1;
David Gross33cda5c2015-01-30 11:41:19 -0800582 }
583
584 if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
585 CalleeArgs.push_back(X);
586 Bump();
587 }
588
David Grosse44a3522015-03-13 15:24:27 -0700589 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
590 bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700591 bccAssert(LoopHeaderInsertionPoint);
David Gross33cda5c2015-01-30 11:41:19 -0800592
Matt Wala083ef3c2015-07-22 18:58:05 -0700593 // Y and Z are loop invariant, so they can be hoisted out of the
594 // loop. Set the IRBuilder insertion point to the loop header.
595 auto OldInsertionPoint = Builder.saveIP();
596 Builder.SetInsertPoint(LoopHeaderInsertionPoint);
David Grosse44a3522015-03-13 15:24:27 -0700597
598 if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700599 SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
600 RsLaunchDimensionsFieldY}));
601 llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
602 CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
David Grosse44a3522015-03-13 15:24:27 -0700603 Bump();
604 }
605
606 if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700607 SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
608 RsLaunchDimensionsFieldZ}));
609 llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
610 CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
David Grosse44a3522015-03-13 15:24:27 -0700611 Bump();
612 }
Matt Wala083ef3c2015-07-22 18:58:05 -0700613
614 Builder.restoreIP(OldInsertionPoint);
David Gross33cda5c2015-01-30 11:41:19 -0800615 }
David Gross28c17992015-07-07 16:44:33 -0700616
617 return Return;
David Gross33cda5c2015-01-30 11:41:19 -0800618 }
619
David Grosse32af522016-01-15 12:15:48 -0800620 // Generate loop-invariant input processing setup code for an expanded
621 // ForEach-able function or an expanded general reduction accumulator
622 // function.
623 //
624 // LoopHeader - block at the end of which the setup code will be inserted
625 // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
626 // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
627 // ArgIter - iterator pointing to first input of the UNexpanded function
628 // NumInputs - number of inputs (NOT number of ARGUMENTS)
629 //
Yong Chenf039d982015-10-21 13:28:09 +0800630 // InTypes[] - this function saves input type, they will be used in ExpandInputsBody().
631 // InBufPtrs[] - this function sets each array element to point to the first cell / byte
632 // (byte for x86, cell for other platforms) of the corresponding input allocation
David Grosse32af522016-01-15 12:15:48 -0800633 // InStructTempSlots[] - this function sets each array element either to nullptr
634 // or to the result of an alloca (for the case where the
635 // calling convention dictates that a value must be passed
636 // by reference, and so we need a stacked temporary to hold
637 // a copy of that value)
638 void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
639 llvm::Value *Arg_p,
640 llvm::MDNode *TBAAPointer,
641 llvm::Function::arg_iterator ArgIter,
642 const size_t NumInputs,
Yong Chenf039d982015-10-21 13:28:09 +0800643 llvm::SmallVectorImpl<llvm::Type *> &InTypes,
David Grosse32af522016-01-15 12:15:48 -0800644 llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
645 llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
646 bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
647
648 // Extract information about input slots. The work done
649 // here is loop-invariant, so we can hoist the operations out of the loop.
650 auto OldInsertionPoint = Builder.saveIP();
651 Builder.SetInsertPoint(LoopHeader->getTerminator());
652
653 for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
654 llvm::Type *InType = ArgIter->getType();
655
656 /*
657 * AArch64 calling conventions dictate that structs of sufficient size
658 * get passed by pointer instead of passed by value. This, combined
659 * with the fact that we don't allow kernels to operate on pointer
660 * data means that if we see a kernel with a pointer parameter we know
661 * that it is a struct input that has been promoted. As such we don't
662 * need to convert its type to a pointer. Later we will need to know
663 * to create a temporary copy on the stack, so we save this information
664 * in InStructTempSlots.
665 */
666 if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
667 llvm::Type *ElementType = PtrType->getElementType();
668 InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
669 "input_struct_slot"));
670 } else {
671 InType = InType->getPointerTo();
672 InStructTempSlots.push_back(nullptr);
673 }
674
675 SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
676 static_cast<int32_t>(InputIndex)}));
677 llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
678 llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
Yong Chenf039d982015-10-21 13:28:09 +0800679
680 llvm::Value *CastInBufPtr = nullptr;
David Gross97e50992017-03-29 20:52:30 +0000681 if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
Yong Chenf039d982015-10-21 13:28:09 +0800682 CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
683 } else {
684 // The disagreement between module and x86 target machine datalayout
685 // causes mismatched input/output data offset between slang reflected
686 // code and bcc codegen for GetElementPtr. To solve this issue, skip the
687 // cast to InType and leave CastInBufPtr as an int8_t*. The buffer is
688 // later indexed with an explicit byte offset computed based on
David Gross97e50992017-03-29 20:52:30 +0000689 // X86_CUSTOM_DL_STRING and then bitcast to actual input type.
Yong Chenf039d982015-10-21 13:28:09 +0800690 CastInBufPtr = InBufPtr;
691 }
David Grosse32af522016-01-15 12:15:48 -0800692
693 if (gEnableRsTbaa) {
694 InBufPtr->setMetadata("tbaa", TBAAPointer);
695 }
696
Yong Chenf039d982015-10-21 13:28:09 +0800697 InTypes.push_back(InType);
David Grosse32af522016-01-15 12:15:48 -0800698 InBufPtrs.push_back(CastInBufPtr);
699 }
700
701 Builder.restoreIP(OldInsertionPoint);
702 }
703
704 // Generate loop-varying input processing code for an expanded ForEach-able function
705 // or an expanded general reduction accumulator function. Also, for the call to the
706 // UNexpanded function, collect the portion of the argument list corresponding to the
707 // inputs.
708 //
709 // Arg_x1 - first X coordinate to be processed by the expanded function
710 // TBAAAllocation - metadata for marking loads of input values out of allocations
711 // NumInputs -- number of inputs (NOT number of ARGUMENTS)
Yong Chenf039d982015-10-21 13:28:09 +0800712 // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant()
713 // to convert the pointer of byte InPtr to its real type.
David Grosse32af522016-01-15 12:15:48 -0800714 // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
715 // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
716 // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
717 //
718 // RootArgs - this function sets this to the list of outgoing argument values corresponding
719 // to the inputs
720 void ExpandInputsBody(llvm::IRBuilder<> &Builder,
721 llvm::Value *Arg_x1,
722 llvm::MDNode *TBAAAllocation,
723 const size_t NumInputs,
Yong Chenf039d982015-10-21 13:28:09 +0800724 const llvm::SmallVectorImpl<llvm::Type *> &InTypes,
David Grosse32af522016-01-15 12:15:48 -0800725 const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
726 const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
727 llvm::Value *IndVar,
728 llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
729 llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
Yong Chenf039d982015-10-21 13:28:09 +0800730 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
David Grosse32af522016-01-15 12:15:48 -0800731
732 for (size_t Index = 0; Index < NumInputs; ++Index) {
David Grosse32af522016-01-15 12:15:48 -0800733
Yong Chenf039d982015-10-21 13:28:09 +0800734 llvm::Value *InPtr = nullptr;
David Gross97e50992017-03-29 20:52:30 +0000735 if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
Yong Chenf039d982015-10-21 13:28:09 +0800736 InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
737 } else {
738 // Treat x86 input buffer as byte[], get indexed pointer with explicit
739 // byte offset computed using a datalayout based on
740 // X86_CUSTOM_DL_STRING, then bitcast it to actual input type.
741 llvm::DataLayout DL(X86_CUSTOM_DL_STRING);
742 llvm::Type *InTy = InTypes[Index];
743 uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType());
744 llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep));
745 InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes);
746 InPtr = Builder.CreatePointerCast(InPtr, InTy);
747 }
748
749 llvm::Value *Input;
David Grosse32af522016-01-15 12:15:48 -0800750 llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
751
752 if (gEnableRsTbaa) {
753 InputLoad->setMetadata("tbaa", TBAAAllocation);
754 }
755
756 if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
757 // Pass a pointer to a temporary on the stack, rather than
758 // passing a pointer to the original value. We do not want
759 // the kernel to potentially modify the input data.
760
761 // Note: don't annotate with TBAA, since the kernel might
762 // have its own TBAA annotations for the pointer argument.
763 Builder.CreateStore(InputLoad, TemporarySlot);
764 Input = TemporarySlot;
765 } else {
766 Input = InputLoad;
767 }
768
769 RootArgs.push_back(Input);
770 }
771 }
772
Tobias Grosser8ae46072013-06-20 14:00:31 -0700773 /* Performs the actual optimization on a selected function. On success, the
774 * Module will contain a new function of the name "<NAME>.expand" that
775 * invokes <NAME>() in a loop with the appropriate parameters.
776 */
Matt Wala4e7a5062015-07-30 16:27:51 -0700777 bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700778 ALOGV("Expanding ForEach-able Function %s",
779 Function->getName().str().c_str());
Tobias Grosser8ae46072013-06-20 14:00:31 -0700780
781 if (!Signature) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700782 Signature = getRootSignature(Function);
Tobias Grosser8ae46072013-06-20 14:00:31 -0700783 if (!Signature) {
784 // We couldn't determine how to expand this function based on its
785 // function signature.
786 return false;
787 }
788 }
789
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700790 llvm::DataLayout DL(Module);
David Gross97e50992017-03-29 20:52:30 +0000791 if (!mStructExplicitlyPaddedBySlang && (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING)) {
Yong Chenf039d982015-10-21 13:28:09 +0800792 DL.reset(X86_CUSTOM_DL_STRING);
793 }
Tobias Grosser8ae46072013-06-20 14:00:31 -0700794
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700795 llvm::Function *ExpandedFunction =
Matt Wala4e7a5062015-07-30 16:27:51 -0700796 createEmptyExpandedForEachKernel(Function->getName());
Stephen Hinesdb169182012-01-05 18:46:36 -0800797
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700798 /*
799 * Extract the expanded function's parameters. It is guaranteed by
David Grosse32af522016-01-15 12:15:48 -0800800 * createEmptyExpandedForEachKernel that there will be four parameters.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700801 */
David Gross33cda5c2015-01-30 11:41:19 -0800802
Matt Wala4e7a5062015-07-30 16:27:51 -0700803 bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
David Gross33cda5c2015-01-30 11:41:19 -0800804
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700805 llvm::Function::arg_iterator ExpandedFunctionArgIter =
806 ExpandedFunction->arg_begin();
Stephen Hinesdb169182012-01-05 18:46:36 -0800807
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700808 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++);
809 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++);
810 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++);
Chris Wailes5010f642014-07-25 15:31:32 -0700811 llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700812
Chris Wailes900c6c12014-08-13 15:40:00 -0700813 llvm::Value *InStep = nullptr;
814 llvm::Value *OutStep = nullptr;
Stephen Hinesdb169182012-01-05 18:46:36 -0800815
816 // Construct the actual function body.
Pirama Arumuga Nainarf229c402016-03-06 23:05:45 -0800817 llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
Stephen Hinesdb169182012-01-05 18:46:36 -0800818
Stephen Hinescc366e52012-02-21 17:22:04 -0800819 // Collect and construct the arguments for the kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800820 // Note that we load any loop-invariant arguments before entering the Loop.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700821 llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
Stephen Hinesdb169182012-01-05 18:46:36 -0800822
Chris Wailes900c6c12014-08-13 15:40:00 -0700823 llvm::Type *InTy = nullptr;
Matt Wala083ef3c2015-07-22 18:58:05 -0700824 llvm::Value *InBufPtr = nullptr;
Stephen Hinesd8817752013-08-02 17:56:51 -0700825 if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
Matt Wala083ef3c2015-07-22 18:58:05 -0700826 SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
827 llvm::LoadInst *InStepArg = Builder.CreateLoad(
828 Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
Chris Wailese10b8642014-07-15 13:18:45 -0700829
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700830 InTy = (FunctionArgIter++)->getType();
Chris Wailese10b8642014-07-15 13:18:45 -0700831 InStep = getStepValue(&DL, InTy, InStepArg);
832
Stephen Hines2b040862012-07-27 20:18:08 -0700833 InStep->setName("instep");
Chris Wailese10b8642014-07-15 13:18:45 -0700834
Matt Wala083ef3c2015-07-22 18:58:05 -0700835 SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
836 InBufPtr = Builder.CreateLoad(
837 Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
Stephen Hinesdb169182012-01-05 18:46:36 -0800838 }
839
Chris Wailes900c6c12014-08-13 15:40:00 -0700840 llvm::Type *OutTy = nullptr;
841 llvm::Value *OutBasePtr = nullptr;
Stephen Hinesd8817752013-08-02 17:56:51 -0700842 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700843 OutTy = (FunctionArgIter++)->getType();
Stephen Hinesb730e232013-01-09 15:31:36 -0800844 OutStep = getStepValue(&DL, OutTy, Arg_outstep);
Stephen Hines2b040862012-07-27 20:18:08 -0700845 OutStep->setName("outstep");
Matt Wala083ef3c2015-07-22 18:58:05 -0700846 SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
847 OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
Stephen Hinesdb169182012-01-05 18:46:36 -0800848 }
849
Chris Wailes900c6c12014-08-13 15:40:00 -0700850 llvm::Value *UsrData = nullptr;
Stephen Hinesd8817752013-08-02 17:56:51 -0700851 if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700852 llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
Matt Wala083ef3c2015-07-22 18:58:05 -0700853 llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
854 UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
Stephen Hinesdb169182012-01-05 18:46:36 -0800855 UsrData->setName("UsrData");
Stephen Hinesdb169182012-01-05 18:46:36 -0800856 }
857
Matt Wala083ef3c2015-07-22 18:58:05 -0700858 llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
Dean De Leo4165d292015-11-25 12:55:21 +0000859 llvm::Value *IV;
Tobias Grossere4a73f62013-06-21 15:35:03 -0700860 createLoop(Builder, Arg_x1, Arg_x2, &IV);
Stephen Hinesdb169182012-01-05 18:46:36 -0800861
David Gross33cda5c2015-01-30 11:41:19 -0800862 llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
David Gross28c17992015-07-07 16:44:33 -0700863 const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
Matt Wala083ef3c2015-07-22 18:58:05 -0700864 [&FunctionArgIter]() { FunctionArgIter++; },
865 LoopHeader->getTerminator());
David Gross33cda5c2015-01-30 11:41:19 -0800866
867 bccAssert(FunctionArgIter == Function->arg_end());
868
Stephen Hinescc366e52012-02-21 17:22:04 -0800869 // Populate the actual call to kernel().
Stephen Hinesdb169182012-01-05 18:46:36 -0800870 llvm::SmallVector<llvm::Value*, 8> RootArgs;
871
Chris Wailes900c6c12014-08-13 15:40:00 -0700872 llvm::Value *InPtr = nullptr;
873 llvm::Value *OutPtr = nullptr;
Stephen Hinesdb169182012-01-05 18:46:36 -0800874
Tobias Grosserae937ec2013-06-27 13:49:47 -0700875 // Calculate the current input and output pointers
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700876 //
Tobias Grosserae937ec2013-06-27 13:49:47 -0700877 // We always calculate the input/output pointers with a GEP operating on i8
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700878 // values and only cast at the very end to OutTy. This is because the step
879 // between two values is given in bytes.
880 //
881 // TODO: We could further optimize the output by using a GEP operation of
882 // type 'OutTy' in cases where the element type of the allocation allows.
883 if (OutBasePtr) {
884 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
885 OutOffset = Builder.CreateMul(OutOffset, OutStep);
Matt Wala083ef3c2015-07-22 18:58:05 -0700886 OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700887 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
888 }
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700889
Matt Wala083ef3c2015-07-22 18:58:05 -0700890 if (InBufPtr) {
Tobias Grosserae937ec2013-06-27 13:49:47 -0700891 llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
892 InOffset = Builder.CreateMul(InOffset, InStep);
Matt Wala083ef3c2015-07-22 18:58:05 -0700893 InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
Tobias Grosserae937ec2013-06-27 13:49:47 -0700894 InPtr = Builder.CreatePointerCast(InPtr, InTy);
895 }
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700896
Tobias Grosserae937ec2013-06-27 13:49:47 -0700897 if (InPtr) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700898 RootArgs.push_back(InPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800899 }
900
Tobias Grosser02f3cd62013-06-27 10:59:10 -0700901 if (OutPtr) {
Stephen Hines7ae3a822012-09-14 19:24:58 -0700902 RootArgs.push_back(OutPtr);
Stephen Hinesdb169182012-01-05 18:46:36 -0800903 }
904
905 if (UsrData) {
906 RootArgs.push_back(UsrData);
907 }
908
David Gross28c17992015-07-07 16:44:33 -0700909 finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
Stephen Hinesdb169182012-01-05 18:46:36 -0800910
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700911 Builder.CreateCall(Function, RootArgs);
Stephen Hinesdb169182012-01-05 18:46:36 -0800912
Stephen Hines7ae3a822012-09-14 19:24:58 -0700913 return true;
914 }
915
Matt Wala4e7a5062015-07-30 16:27:51 -0700916 /* Expand a pass-by-value foreach kernel.
Stephen Hines7ae3a822012-09-14 19:24:58 -0700917 */
Matt Wala4e7a5062015-07-30 16:27:51 -0700918 bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
Stephen Hinesd8817752013-08-02 17:56:51 -0700919 bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700920 ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700921
Matt Wala4e7a5062015-07-30 16:27:51 -0700922 // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700923 llvm::DataLayout DL(Module);
David Gross97e50992017-03-29 20:52:30 +0000924 if (!mStructExplicitlyPaddedBySlang && (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING)) {
Yong Chenf039d982015-10-21 13:28:09 +0800925 DL.reset(X86_CUSTOM_DL_STRING);
926 }
927 llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
Stephen Hines7ae3a822012-09-14 19:24:58 -0700928
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700929 llvm::Function *ExpandedFunction =
Matt Wala4e7a5062015-07-30 16:27:51 -0700930 createEmptyExpandedForEachKernel(Function->getName());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700931
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700932 /*
933 * Extract the expanded function's parameters. It is guaranteed by
David Grosse32af522016-01-15 12:15:48 -0800934 * createEmptyExpandedForEachKernel that there will be four parameters.
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700935 */
Chris Wailes881cda42014-06-23 11:27:41 -0700936
Matt Wala4e7a5062015-07-30 16:27:51 -0700937 bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
Chris Wailes881cda42014-06-23 11:27:41 -0700938
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700939 llvm::Function::arg_iterator ExpandedFunctionArgIter =
940 ExpandedFunction->arg_begin();
941
942 llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++);
943 llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++);
944 llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++);
Matt Wala3bc475b2015-08-12 17:56:19 -0700945 // Arg_outstep is not used by expanded new-style forEach kernels.
Stephen Hines7ae3a822012-09-14 19:24:58 -0700946
Stephen Hines7ae3a822012-09-14 19:24:58 -0700947 // Construct the actual function body.
Pirama Arumuga Nainarf229c402016-03-06 23:05:45 -0800948 llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
Stephen Hines7ae3a822012-09-14 19:24:58 -0700949
Tobias Grosser18a38a32013-07-26 15:03:03 -0700950 // Create TBAA meta-data.
Stephen Hines354d1c12015-04-03 22:54:54 -0700951 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
952 *TBAAAllocation, *TBAAPointer;
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700953 llvm::MDBuilder MDHelper(*Context);
Logan Chien14588cf2014-02-20 12:35:51 +0800954
Stephen Hines354d1c12015-04-03 22:54:54 -0700955 TBAARenderScriptDistinct =
Matt Wala4e7a5062015-07-30 16:27:51 -0700956 MDHelper.createTBAARoot(kRenderScriptTBAARootName);
957 TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
Stephen Hines354d1c12015-04-03 22:54:54 -0700958 TBAARenderScriptDistinct);
Chris Wailese10b8642014-07-15 13:18:45 -0700959 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
960 TBAARenderScript);
961 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
962 TBAAAllocation, 0);
963 TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
964 TBAARenderScript);
Logan Chien14588cf2014-02-20 12:35:51 +0800965 TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
Tobias Grosser18a38a32013-07-26 15:03:03 -0700966
Chris Wailes881cda42014-06-23 11:27:41 -0700967 /*
968 * Collect and construct the arguments for the kernel().
969 *
970 * Note that we load any loop-invariant arguments before entering the Loop.
971 */
Matt Wala083ef3c2015-07-22 18:58:05 -0700972 size_t NumRemainingInputs = Function->arg_size();
Stephen Hines7ae3a822012-09-14 19:24:58 -0700973
Chris Wailes881cda42014-06-23 11:27:41 -0700974 // No usrData parameter on kernels.
975 bccAssert(
976 !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
977
978 llvm::Function::arg_iterator ArgIter = Function->arg_begin();
979
980 // Check the return type
Tim Murraybb73b742014-11-04 11:20:10 -0800981 llvm::Type *OutTy = nullptr;
Tim Murraybb73b742014-11-04 11:20:10 -0800982 llvm::LoadInst *OutBasePtr = nullptr;
983 llvm::Value *CastedOutBasePtr = nullptr;
Chris Wailes881cda42014-06-23 11:27:41 -0700984
Chris Wailese10b8642014-07-15 13:18:45 -0700985 bool PassOutByPointer = false;
Chris Wailes881cda42014-06-23 11:27:41 -0700986
Stephen Hinesd8817752013-08-02 17:56:51 -0700987 if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -0700988 llvm::Type *OutBaseTy = Function->getReturnType();
Chris Wailes881cda42014-06-23 11:27:41 -0700989
Stephen Hines74a4b082012-09-21 19:26:48 -0700990 if (OutBaseTy->isVoidTy()) {
Chris Wailese10b8642014-07-15 13:18:45 -0700991 PassOutByPointer = true;
Chris Wailes881cda42014-06-23 11:27:41 -0700992 OutTy = ArgIter->getType();
993
994 ArgIter++;
Matt Wala083ef3c2015-07-22 18:58:05 -0700995 --NumRemainingInputs;
Stephen Hines74a4b082012-09-21 19:26:48 -0700996 } else {
Stephen Hines74a4b082012-09-21 19:26:48 -0700997 // We don't increment Args, since we are using the actual return type.
Chris Wailes881cda42014-06-23 11:27:41 -0700998 OutTy = OutBaseTy->getPointerTo();
Stephen Hines74a4b082012-09-21 19:26:48 -0700999 }
Chris Wailes881cda42014-06-23 11:27:41 -07001000
Matt Wala083ef3c2015-07-22 18:58:05 -07001001 SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
1002 OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
Chris Wailes097ca142014-07-08 15:57:12 -07001003
Stephen Hines9c5263e2014-02-11 15:58:48 -08001004 if (gEnableRsTbaa) {
1005 OutBasePtr->setMetadata("tbaa", TBAAPointer);
1006 }
Tim Murray50f5eb42014-12-09 17:36:24 -08001007
David Gross97e50992017-03-29 20:52:30 +00001008 if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
Yong Chenf039d982015-10-21 13:28:09 +08001009 CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
1010 } else {
1011 // The disagreement between module and x86 target machine datalayout
1012 // causes mismatched input/output data offset between slang reflected
1013 // code and bcc codegen for GetElementPtr. To solve this issue, skip the
1014 // cast to OutTy and leave CastedOutBasePtr as an int8_t*. The buffer
1015 // is later indexed with an explicit byte offset computed based on
David Gross97e50992017-03-29 20:52:30 +00001016 // X86_CUSTOM_DL_STRING and then bitcast to actual output type.
Yong Chenf039d982015-10-21 13:28:09 +08001017 CastedOutBasePtr = OutBasePtr;
1018 }
Stephen Hines74a4b082012-09-21 19:26:48 -07001019 }
1020
Yong Chenf039d982015-10-21 13:28:09 +08001021 llvm::SmallVector<llvm::Type*, 8> InTypes;
Matt Wala083ef3c2015-07-22 18:58:05 -07001022 llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1023 llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1024
1025 bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
1026
1027 // Create the loop structure.
1028 llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
Dean De Leo4165d292015-11-25 12:55:21 +00001029 llvm::Value *IV;
David Gross33cda5c2015-01-30 11:41:19 -08001030 createLoop(Builder, Arg_x1, Arg_x2, &IV);
1031
1032 llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
Matt Wala083ef3c2015-07-22 18:58:05 -07001033 const int CalleeArgsContextIdx =
1034 ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
1035 [&NumRemainingInputs]() { --NumRemainingInputs; },
1036 LoopHeader->getTerminator());
David Gross33cda5c2015-01-30 11:41:19 -08001037
Matt Wala083ef3c2015-07-22 18:58:05 -07001038 // After ExpandSpecialArguments() gets called, NumRemainingInputs
1039 // counts the number of arguments to the kernel that correspond to
1040 // an array entry from the InPtr field of the DriverInfo
1041 // structure.
1042 const size_t NumInPtrArguments = NumRemainingInputs;
Chris Wailes881cda42014-06-23 11:27:41 -07001043
Matt Wala083ef3c2015-07-22 18:58:05 -07001044 if (NumInPtrArguments > 0) {
David Grosse32af522016-01-15 12:15:48 -08001045 ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
Yong Chenf039d982015-10-21 13:28:09 +08001046 InTypes, InBufPtrs, InStructTempSlots);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001047 }
1048
Stephen Hines7ae3a822012-09-14 19:24:58 -07001049 // Populate the actual call to kernel().
1050 llvm::SmallVector<llvm::Value*, 8> RootArgs;
1051
Matt Wala9296edc2015-08-05 16:32:42 -07001052 // Calculate the current input and output pointers.
Chris Wailes881cda42014-06-23 11:27:41 -07001053
1054 // Output
1055
Chris Wailes900c6c12014-08-13 15:40:00 -07001056 llvm::Value *OutPtr = nullptr;
Tim Murraybb73b742014-11-04 11:20:10 -08001057 if (CastedOutBasePtr) {
Tobias Grosser7b662902013-06-21 17:07:39 -07001058 llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
Yong Chenf039d982015-10-21 13:28:09 +08001059
David Gross97e50992017-03-29 20:52:30 +00001060 if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
Yong Chenf039d982015-10-21 13:28:09 +08001061 OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
1062 } else {
1063 // Treat x86 output buffer as byte[], get indexed pointer with explicit
1064 // byte offset computed using a datalayout based on
1065 // X86_CUSTOM_DL_STRING, then bitcast it to actual output type.
1066 uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType());
1067 llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep));
1068 OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes);
1069 OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
1070 }
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001071
Chris Wailese10b8642014-07-15 13:18:45 -07001072 if (PassOutByPointer) {
Chris Wailes881cda42014-06-23 11:27:41 -07001073 RootArgs.push_back(OutPtr);
Stephen Hines9c5263e2014-02-11 15:58:48 -08001074 }
Chris Wailes881cda42014-06-23 11:27:41 -07001075 }
1076
1077 // Inputs
1078
Matt Wala083ef3c2015-07-22 18:58:05 -07001079 if (NumInPtrArguments > 0) {
David Grosse32af522016-01-15 12:15:48 -08001080 ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
Yong Chenf039d982015-10-21 13:28:09 +08001081 InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001082 }
1083
David Gross28c17992015-07-07 16:44:33 -07001084 finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001085
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001086 llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
Stephen Hines7ae3a822012-09-14 19:24:58 -07001087
Chris Wailese10b8642014-07-15 13:18:45 -07001088 if (OutPtr && !PassOutByPointer) {
Matt Wala9296edc2015-08-05 16:32:42 -07001089 RetVal->setName("call.result");
Tobias Grosser18a38a32013-07-26 15:03:03 -07001090 llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
Stephen Hines9c5263e2014-02-11 15:58:48 -08001091 if (gEnableRsTbaa) {
1092 Store->setMetadata("tbaa", TBAAAllocation);
1093 }
Stephen Hines7ae3a822012-09-14 19:24:58 -07001094 }
1095
Stephen Hinesdb169182012-01-05 18:46:36 -08001096 return true;
1097 }
1098
David Grosse32af522016-01-15 12:15:48 -08001099 // Certain categories of functions that make up a general
1100 // reduce-style kernel are called directly from the driver with no
1101 // expansion needed. For a function in such a category, we need to
1102 // promote linkage from static to external, to ensure that the
1103 // function is visible to the driver in the dynamic symbol table.
1104 // This promotion is safe because we don't have any kind of cross
1105 // translation unit linkage model (except for linking against
1106 // RenderScript libraries), so we do not risk name clashes.
David Gross9fa4d442016-06-02 14:46:55 -07001107 bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) {
David Grosse32af522016-01-15 12:15:48 -08001108 if (!Name) // a presumably-optional function that is not present
1109 return false;
1110
1111 llvm::Function *Fn = Module->getFunction(Name);
1112 bccAssert(Fn != nullptr);
1113 if (PromotedFunctions.insert(Fn).second) {
1114 bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1115 Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1116 return true;
1117 }
1118
1119 return false;
1120 }
1121
1122 // Expand the accumulator function for a general reduce-style kernel.
1123 //
1124 // The input is a function of the form
1125 //
1126 // define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1127 //
1128 // where all arguments except the first are the same as for a foreach kernel.
1129 //
1130 // The input accumulator function gets expanded into a function of the form
1131 //
1132 // define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1133 //
1134 // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1135 //
1136 // In pseudocode, @func.expand does:
1137 //
1138 // for (i = %x1; i < %x2; ++i) {
1139 // func(%accum,
1140 // *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1141 // [, p] [, i] [, p->current.y] [, p->current.z]);
1142 // }
1143 //
1144 // This is very similar to foreach kernel expansion with no output.
David Gross9fa4d442016-06-02 14:46:55 -07001145 bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
David Grosse32af522016-01-15 12:15:48 -08001146 ALOGV("Expanding accumulator %s for general reduce kernel",
1147 FnAccumulator->getName().str().c_str());
1148
1149 // Create TBAA meta-data.
1150 llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1151 *TBAAAllocation, *TBAAPointer;
1152 llvm::MDBuilder MDHelper(*Context);
1153 TBAARenderScriptDistinct =
1154 MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1155 TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1156 TBAARenderScriptDistinct);
1157 TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1158 TBAARenderScript);
1159 TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1160 TBAAAllocation, 0);
1161 TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1162 TBAARenderScript);
1163 TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1164
1165 auto AccumulatorArgIter = FnAccumulator->arg_begin();
1166
1167 // Create empty accumulator function.
1168 llvm::Function *FnExpandedAccumulator =
David Gross9fa4d442016-06-02 14:46:55 -07001169 createEmptyExpandedReduceAccumulator(FnAccumulator->getName(),
1170 (AccumulatorArgIter++)->getType());
David Grosse32af522016-01-15 12:15:48 -08001171
1172 // Extract the expanded accumulator's parameters. It is
David Gross9fa4d442016-06-02 14:46:55 -07001173 // guaranteed by createEmptyExpandedReduceAccumulator that
David Grosse32af522016-01-15 12:15:48 -08001174 // there will be 4 parameters.
David Gross9fa4d442016-06-02 14:46:55 -07001175 bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
David Grosse32af522016-01-15 12:15:48 -08001176 auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1177 llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++);
1178 llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++);
1179 llvm::Value *Arg_x2 = &*(ExpandedAccumulatorArgIter++);
1180 llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1181
1182 // Construct the actual function body.
Pirama Arumuga Nainarf229c402016-03-06 23:05:45 -08001183 llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin());
David Grosse32af522016-01-15 12:15:48 -08001184
1185 // Create the loop structure.
1186 llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
Dean De Leo4165d292015-11-25 12:55:21 +00001187 llvm::Value *IndVar;
David Grosse32af522016-01-15 12:15:48 -08001188 createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1189
1190 llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1191 const int CalleeArgsContextIdx =
1192 ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1193 [](){}, LoopHeader->getTerminator());
1194
Yong Chenf039d982015-10-21 13:28:09 +08001195 llvm::SmallVector<llvm::Type*, 8> InTypes;
David Grosse32af522016-01-15 12:15:48 -08001196 llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1197 llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1198 ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
Yong Chenf039d982015-10-21 13:28:09 +08001199 InTypes, InBufPtrs, InStructTempSlots);
David Grosse32af522016-01-15 12:15:48 -08001200
1201 // Populate the actual call to the original accumulator.
1202 llvm::SmallVector<llvm::Value*, 8> RootArgs;
1203 RootArgs.push_back(Arg_accum);
Yong Chenf039d982015-10-21 13:28:09 +08001204 ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots,
David Grosse32af522016-01-15 12:15:48 -08001205 IndVar, RootArgs);
1206 finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1207 Builder.CreateCall(FnAccumulator, RootArgs);
1208
1209 return true;
1210 }
1211
David Grossdd33eb82016-04-08 12:35:41 -07001212 // Create a combiner function for a general reduce-style kernel that lacks one,
1213 // by calling the accumulator function.
1214 //
1215 // The accumulator function must be of the form
1216 //
1217 // define void @accumFn(accumType* %accum, accumType %in)
1218 //
1219 // A combiner function will be generated of the form
1220 //
1221 // define void @accumFn.combiner(accumType* %accum, accumType* %other) {
1222 // %1 = load accumType, accumType* %other
1223 // call void @accumFn(accumType* %accum, accumType %1);
1224 // }
David Gross9fa4d442016-06-02 14:46:55 -07001225 bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) {
David Grossdd33eb82016-04-08 12:35:41 -07001226 ALOGV("Creating combiner from accumulator %s for general reduce kernel",
1227 FnAccumulator->getName().str().c_str());
1228
1229 using llvm::Attribute;
1230
1231 bccAssert(FnAccumulator->arg_size() == 2);
1232 auto AccumulatorArgIter = FnAccumulator->arg_begin();
1233 llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++);
1234 llvm::Value *AccumulatorArg_in = &*(AccumulatorArgIter++);
1235 llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType();
1236 bccAssert(AccumulatorArgType->isPointerTy());
1237
1238 llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
1239 llvm::FunctionType *CombinerType =
1240 llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
1241 llvm::Function *FnCombiner =
1242 llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
David Gross9fa4d442016-06-02 14:46:55 -07001243 nameReduceCombinerFromAccumulator(FnAccumulator->getName()),
David Grossdd33eb82016-04-08 12:35:41 -07001244 Module);
1245
1246 auto CombinerArgIter = FnCombiner->arg_begin();
1247
1248 llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++);
1249 CombinerArg_accum->setName("accum");
1250 CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1,
1251 llvm::makeArrayRef(Attribute::NoCapture)));
1252
1253 llvm::Argument *CombinerArg_other = &(*CombinerArgIter++);
1254 CombinerArg_other->setName("other");
1255 CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1,
1256 llvm::makeArrayRef(Attribute::NoCapture)));
1257
1258 llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner);
1259 llvm::IRBuilder<> Builder(BB);
1260
1261 if (AccumulatorArg_in->getType()->isPointerTy()) {
1262 // Types of sufficient size get passed by pointer-to-copy rather
1263 // than passed by value. An accumulator cannot take a pointer
1264 // at the user level; so if we see a pointer here, we know that
1265 // we have a pass-by-pointer-to-copy case.
1266 llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType();
1267 llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy");
1268 Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem);
1269 Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem });
1270 } else {
1271 llvm::Value *TypeAdjustedOther = CombinerArg_other;
1272 if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) {
1273 // Call lowering by frontend has done some type coercion
1274 TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other,
1275 AccumulatorArg_in->getType()->getPointerTo(),
1276 "cast");
1277 }
1278 llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther);
1279 Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther });
1280 }
1281 Builder.CreateRetVoid();
1282
1283 return true;
1284 }
1285
Tobias Grosser18a38a32013-07-26 15:03:03 -07001286 /// @brief Checks if pointers to allocation internals are exposed
1287 ///
1288 /// This function verifies if through the parameters passed to the kernel
1289 /// or through calls to the runtime library the script gains access to
1290 /// pointers pointing to data within a RenderScript Allocation.
1291 /// If we know we control all loads from and stores to data within
1292 /// RenderScript allocations and if we know the run-time internal accesses
1293 /// are all annotated with RenderScript TBAA metadata, only then we
1294 /// can safely use TBAA to distinguish between generic and from-allocation
1295 /// pointers.
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001296 bool allocPointersExposed(llvm::Module &Module) {
Tobias Grosser18a38a32013-07-26 15:03:03 -07001297 // Old style kernel function can expose pointers to elements within
1298 // allocations.
1299 // TODO: Extend analysis to allow simple cases of old-style kernels.
Stephen Hines25eb5862014-05-08 18:25:50 -07001300 for (size_t i = 0; i < mExportForEachCount; ++i) {
1301 const char *Name = mExportForEachNameList[i];
1302 uint32_t Signature = mExportForEachSignatureList[i];
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001303 if (Module.getFunction(Name) &&
Stephen Hinesd8817752013-08-02 17:56:51 -07001304 !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
Tobias Grosser18a38a32013-07-26 15:03:03 -07001305 return true;
1306 }
1307 }
1308
1309 // Check for library functions that expose a pointer to an Allocation or
1310 // that are not yet annotated with RenderScript-specific tbaa information.
Matt Walae2423782015-06-30 10:56:08 -07001311 static const std::vector<const char *> Funcs{
1312 // rsGetElementAt(...)
1313 "_Z14rsGetElementAt13rs_allocationj",
1314 "_Z14rsGetElementAt13rs_allocationjj",
1315 "_Z14rsGetElementAt13rs_allocationjjj",
Tobias Grosser18a38a32013-07-26 15:03:03 -07001316
Matt Walae2423782015-06-30 10:56:08 -07001317 // rsSetElementAt()
1318 "_Z14rsSetElementAt13rs_allocationPvj",
1319 "_Z14rsSetElementAt13rs_allocationPvjj",
1320 "_Z14rsSetElementAt13rs_allocationPvjjj",
Tobias Grosser18a38a32013-07-26 15:03:03 -07001321
Matt Walae2423782015-06-30 10:56:08 -07001322 // rsGetElementAtYuv_uchar_Y()
1323 "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1324
1325 // rsGetElementAtYuv_uchar_U()
1326 "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1327
1328 // rsGetElementAtYuv_uchar_V()
1329 "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1330 };
1331
1332 for (auto FI : Funcs) {
1333 llvm::Function *Function = Module.getFunction(FI);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001334
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001335 if (!Function) {
Matt Walae2423782015-06-30 10:56:08 -07001336 ALOGE("Missing run-time function '%s'", FI);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001337 return true;
1338 }
1339
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001340 if (Function->getNumUses() > 0) {
Tobias Grosser18a38a32013-07-26 15:03:03 -07001341 return true;
1342 }
1343 }
1344
1345 return false;
1346 }
1347
1348 /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
1349 ///
1350 /// The TBAA metadata used to annotate loads/stores from RenderScript
Chris Wailese10b8642014-07-15 13:18:45 -07001351 /// Allocations is generated in a separate TBAA tree with a
Stephen Hines354d1c12015-04-03 22:54:54 -07001352 /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1353 /// all nodes in unrelated alias analysis trees. This function makes the
1354 /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
Chris Wailese10b8642014-07-15 13:18:45 -07001355 /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1356 /// the connected trees every access to an Allocation is resolved to
1357 /// must-alias if compared to a normal C/C++ access.
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001358 void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1359 llvm::MDBuilder MDHelper(*Context);
Stephen Hines354d1c12015-04-03 22:54:54 -07001360 llvm::MDNode *TBAARenderScriptDistinct =
1361 MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1362 llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1363 "RenderScript TBAA", TBAARenderScriptDistinct);
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001364 llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA");
Stephen Hines354d1c12015-04-03 22:54:54 -07001365 TBAARenderScript->replaceOperandWith(1, TBAARoot);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001366 }
1367
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001368 virtual bool runOnModule(llvm::Module &Module) {
1369 bool Changed = false;
1370 this->Module = &Module;
Matt Wala4e7a5062015-07-30 16:27:51 -07001371 Context = &Module.getContext();
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001372
Matt Wala4e7a5062015-07-30 16:27:51 -07001373 buildTypes();
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001374
1375 bcinfo::MetadataExtractor me(&Module);
Stephen Hines25eb5862014-05-08 18:25:50 -07001376 if (!me.extract()) {
1377 ALOGE("Could not extract metadata from module!");
1378 return false;
1379 }
Matt Wala4e7a5062015-07-30 16:27:51 -07001380
David Gross97e50992017-03-29 20:52:30 +00001381 mStructExplicitlyPaddedBySlang = (me.getCompilerVersion() >= SlangVersion::N_STRUCT_EXPLICIT_PADDING);
1382
Matt Wala4e7a5062015-07-30 16:27:51 -07001383 // Expand forEach_* style kernels.
Stephen Hines25eb5862014-05-08 18:25:50 -07001384 mExportForEachCount = me.getExportForEachSignatureCount();
1385 mExportForEachNameList = me.getExportForEachNameList();
1386 mExportForEachSignatureList = me.getExportForEachSignatureList();
Stephen Hinesdb169182012-01-05 18:46:36 -08001387
Stephen Hines25eb5862014-05-08 18:25:50 -07001388 for (size_t i = 0; i < mExportForEachCount; ++i) {
1389 const char *name = mExportForEachNameList[i];
1390 uint32_t signature = mExportForEachSignatureList[i];
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001391 llvm::Function *kernel = Module.getFunction(name);
Tobias Grossercd5b6572013-07-01 15:04:07 -07001392 if (kernel) {
Stephen Hinesd8817752013-08-02 17:56:51 -07001393 if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
Matt Wala4e7a5062015-07-30 16:27:51 -07001394 Changed |= ExpandForEach(kernel, signature);
Tobias Grosseracde6012013-07-02 14:28:01 -07001395 kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1396 } else if (kernel->getReturnType()->isVoidTy()) {
Matt Wala4e7a5062015-07-30 16:27:51 -07001397 Changed |= ExpandOldStyleForEach(kernel, signature);
Tobias Grosseracde6012013-07-02 14:28:01 -07001398 kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1399 } else {
1400 // There are some graphics root functions that are not
1401 // expanded, but that will be called directly. For those
1402 // functions, we can not set the linkage to internal.
1403 }
Stephen Hinescc366e52012-02-21 17:22:04 -08001404 }
Stephen Hinesdb169182012-01-05 18:46:36 -08001405 }
1406
David Grosse32af522016-01-15 12:15:48 -08001407 // Process general reduce_* style functions.
David Gross9fa4d442016-06-02 14:46:55 -07001408 const size_t ExportReduceCount = me.getExportReduceCount();
1409 const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList();
David Grosse32af522016-01-15 12:15:48 -08001410 // Note that functions can be shared between kernels
David Grossdd33eb82016-04-08 12:35:41 -07001411 FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
David Grosse32af522016-01-15 12:15:48 -08001412
David Gross9fa4d442016-06-02 14:46:55 -07001413 for (size_t i = 0; i < ExportReduceCount; ++i) {
1414 Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions);
1415 Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions);
1416 Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions);
David Grosse32af522016-01-15 12:15:48 -08001417
1418 // Accumulator
David Gross9fa4d442016-06-02 14:46:55 -07001419 llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName);
David Grosse32af522016-01-15 12:15:48 -08001420 bccAssert(accumulator != nullptr);
1421 if (ExpandedAccumulators.insert(accumulator).second)
David Gross9fa4d442016-06-02 14:46:55 -07001422 Changed |= ExpandReduceAccumulator(accumulator,
1423 ExportReduceList[i].mSignature,
1424 ExportReduceList[i].mInputCount);
1425 if (!ExportReduceList[i].mCombinerName) {
David Grossdd33eb82016-04-08 12:35:41 -07001426 if (AccumulatorsForCombiners.insert(accumulator).second)
David Gross9fa4d442016-06-02 14:46:55 -07001427 Changed |= CreateReduceCombinerFromAccumulator(accumulator);
David Grossdd33eb82016-04-08 12:35:41 -07001428 }
David Grosse32af522016-01-15 12:15:48 -08001429 }
1430
Matt Wala4e7a5062015-07-30 16:27:51 -07001431 if (gEnableRsTbaa && !allocPointersExposed(Module)) {
Chris Wailesbdbff6e2014-06-13 13:47:19 -07001432 connectRenderScriptTBAAMetadata(Module);
Tobias Grosser18a38a32013-07-26 15:03:03 -07001433 }
1434
Stephen Hinescc366e52012-02-21 17:22:04 -08001435 return Changed;
Stephen Hinesdb169182012-01-05 18:46:36 -08001436 }
1437
1438 virtual const char *getPassName() const {
Matt Wala4e7a5062015-07-30 16:27:51 -07001439 return "forEach_* and reduce_* function expansion";
Stephen Hinesdb169182012-01-05 18:46:36 -08001440 }
1441
Matt Wala4e7a5062015-07-30 16:27:51 -07001442}; // end RSKernelExpandPass
Stephen Hinesdb169182012-01-05 18:46:36 -08001443
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001444} // end anonymous namespace
1445
Matt Wala4e7a5062015-07-30 16:27:51 -07001446char RSKernelExpandPass::ID = 0;
1447static llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
Stephen Hinesdb169182012-01-05 18:46:36 -08001448
1449namespace bcc {
1450
Dean De Leo4165d292015-11-25 12:55:21 +00001451const char BCC_INDEX_VAR_NAME[] = "rsIndex";
1452
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001453llvm::ModulePass *
Matt Wala4e7a5062015-07-30 16:27:51 -07001454createRSKernelExpandPass(bool pEnableStepOpt) {
1455 return new RSKernelExpandPass(pEnableStepOpt);
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001456}
Stephen Hinesdb169182012-01-05 18:46:36 -08001457
Shih-wei Liao7a66e6c2012-04-25 04:04:15 -07001458} // end namespace bcc