Support for general reduction kernels.
Bug: 23535724
Change-Id: I28b6b8fafab3fe9f5d09beb990791843875e1f26
diff --git a/lib/Renderscript/RSGlobalInfoPass.cpp b/lib/Renderscript/RSGlobalInfoPass.cpp
index 0600692..68d082a 100644
--- a/lib/Renderscript/RSGlobalInfoPass.cpp
+++ b/lib/Renderscript/RSGlobalInfoPass.cpp
@@ -138,6 +138,11 @@
continue;
}
+ // Skip intrinsic variables.
+ if (GV.getName().startswith("llvm.")) {
+ continue;
+ }
+
// In LLVM, an instance of GlobalVariable is actually a Value
// corresponding to the address of it.
GVAddresses.push_back(llvm::ConstantExpr::getBitCast(&GV, VoidPtrTy));
diff --git a/lib/Renderscript/RSKernelExpand.cpp b/lib/Renderscript/RSKernelExpand.cpp
index 34611d7..d06cb5b 100644
--- a/lib/Renderscript/RSKernelExpand.cpp
+++ b/lib/Renderscript/RSKernelExpand.cpp
@@ -19,6 +19,7 @@
#include <cstdlib>
#include <functional>
+#include <unordered_set>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
@@ -42,6 +43,7 @@
// Only used in bccAssert()
const int kNumExpandedForeachParams = 4;
const int kNumExpandedReduceParams = 3;
+const int kNumExpandedReduceNewAccumulatorParams = 4;
#endif
const char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
@@ -70,6 +72,8 @@
private:
static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
+ typedef std::unordered_set<llvm::Function *> FunctionSet;
+
enum RsLaunchDimensionsField {
RsLaunchDimensionsFieldX,
RsLaunchDimensionsFieldY,
@@ -105,6 +109,7 @@
* the pass is run on.
*/
llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType;
+ llvm::Type *RsExpandKernelDriverInfoPfxTy;
uint32_t mExportForEachCount;
const char **mExportForEachNameList;
@@ -294,7 +299,7 @@
RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current
RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr
RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen
- llvm::StructType *RsExpandKernelDriverInfoPfxTy =
+ RsExpandKernelDriverInfoPfxTy =
llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
// Create the function type for expanded kernels.
@@ -369,6 +374,55 @@
return ExpandedFunction;
}
+ // Create skeleton of a general reduce kernel's expanded accumulator.
+ //
+ // This creates a function with the following signature:
+ //
+ // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
+ // i32 %x1, i32 %x2, accumType* nocapture %accum)
+ //
+ llvm::Function *createEmptyExpandedReduceNewAccumulator(llvm::StringRef OldName,
+ llvm::Type *AccumArgTy) {
+ llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
+ llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
+ llvm::FunctionType *ExpandedReduceNewAccumulatorType =
+ llvm::FunctionType::get(VoidTy,
+ {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
+ Int32Ty, Int32Ty, AccumArgTy}, false);
+ llvm::Function *FnExpandedAccumulator =
+ llvm::Function::Create(ExpandedReduceNewAccumulatorType,
+ llvm::GlobalValue::ExternalLinkage,
+ OldName + ".expand", Module);
+ bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
+
+ llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
+
+ using llvm::Attribute;
+
+ llvm::Argument *Arg_p = &(*AI++);
+ Arg_p->setName("p");
+ Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
+ llvm::makeArrayRef(Attribute::NoCapture)));
+
+ llvm::Argument *Arg_x1 = &(*AI++);
+ Arg_x1->setName("x1");
+
+ llvm::Argument *Arg_x2 = &(*AI++);
+ Arg_x2->setName("x2");
+
+ llvm::Argument *Arg_accum = &(*AI++);
+ Arg_accum->setName("accum");
+ Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
+ llvm::makeArrayRef(Attribute::NoCapture)));
+
+ llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
+ FnExpandedAccumulator);
+ llvm::IRBuilder<> Builder(Begin);
+ Builder.CreateRetVoid();
+
+ return FnExpandedAccumulator;
+ }
+
/// @brief Create an empty loop
///
/// Create a loop of the form:
@@ -504,12 +558,12 @@
}
// Build contribution to outgoing argument list for calling a
- // ForEach-able function, based on the special parameters of that
- // function.
+ // ForEach-able function or a general reduction accumulator
+ // function, based on the special parameters of that function.
//
- // Signature - metadata bits for the signature of the ForEach-able function
+ // Signature - metadata bits for the signature of the callee
// X, Arg_p - values derived directly from expanded function,
- // suitable for computing arguments for the ForEach-able function
+ // suitable for computing arguments for the callee
// CalleeArgs - contribution is accumulated here
// Bump - invoked once for each contributed outgoing argument
// LoopHeaderInsertionPoint - an Instruction in the loop header, before which
@@ -571,6 +625,126 @@
return Return;
}
+ // Generate loop-invariant input processing setup code for an expanded
+ // ForEach-able function or an expanded general reduction accumulator
+ // function.
+ //
+ // LoopHeader - block at the end of which the setup code will be inserted
+ // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
+ // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
+ // ArgIter - iterator pointing to first input of the UNexpanded function
+ // NumInputs - number of inputs (NOT number of ARGUMENTS)
+ //
+ // InBufPtrs[] - this function sets each array element to point to the first
+ // cell of the corresponding input allocation
+ // InStructTempSlots[] - this function sets each array element either to nullptr
+ // or to the result of an alloca (for the case where the
+ // calling convention dictates that a value must be passed
+ // by reference, and so we need a stacked temporary to hold
+ // a copy of that value)
+ void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
+ llvm::Value *Arg_p,
+ llvm::MDNode *TBAAPointer,
+ llvm::Function::arg_iterator ArgIter,
+ const size_t NumInputs,
+ llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
+ llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
+ bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
+
+ // Extract information about input slots. The work done
+ // here is loop-invariant, so we can hoist the operations out of the loop.
+ auto OldInsertionPoint = Builder.saveIP();
+ Builder.SetInsertPoint(LoopHeader->getTerminator());
+
+ for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
+ llvm::Type *InType = ArgIter->getType();
+
+ /*
+ * AArch64 calling conventions dictate that structs of sufficient size
+ * get passed by pointer instead of passed by value. This, combined
+ * with the fact that we don't allow kernels to operate on pointer
+ * data means that if we see a kernel with a pointer parameter we know
+ * that it is a struct input that has been promoted. As such we don't
+ * need to convert its type to a pointer. Later we will need to know
+ * to create a temporary copy on the stack, so we save this information
+ * in InStructTempSlots.
+ */
+ if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
+ llvm::Type *ElementType = PtrType->getElementType();
+ InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
+ "input_struct_slot"));
+ } else {
+ InType = InType->getPointerTo();
+ InStructTempSlots.push_back(nullptr);
+ }
+
+ SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
+ static_cast<int32_t>(InputIndex)}));
+ llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
+ llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
+ llvm::Value *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
+
+ if (gEnableRsTbaa) {
+ InBufPtr->setMetadata("tbaa", TBAAPointer);
+ }
+
+ InBufPtrs.push_back(CastInBufPtr);
+ }
+
+ Builder.restoreIP(OldInsertionPoint);
+ }
+
+ // Generate loop-varying input processing code for an expanded ForEach-able function
+ // or an expanded general reduction accumulator function. Also, for the call to the
+ // UNexpanded function, collect the portion of the argument list corresponding to the
+ // inputs.
+ //
+ // Arg_x1 - first X coordinate to be processed by the expanded function
+ // TBAAAllocation - metadata for marking loads of input values out of allocations
+ // NumInputs -- number of inputs (NOT number of ARGUMENTS)
+ // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
+ // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
+ // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
+ //
+ // RootArgs - this function sets this to the list of outgoing argument values corresponding
+ // to the inputs
+ void ExpandInputsBody(llvm::IRBuilder<> &Builder,
+ llvm::Value *Arg_x1,
+ llvm::MDNode *TBAAAllocation,
+ const size_t NumInputs,
+ const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
+ const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
+ llvm::Value *IndVar,
+ llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
+ llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
+
+ for (size_t Index = 0; Index < NumInputs; ++Index) {
+ llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
+ llvm::Value *Input;
+
+ llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
+
+ if (gEnableRsTbaa) {
+ InputLoad->setMetadata("tbaa", TBAAAllocation);
+ }
+
+ if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
+ // Pass a pointer to a temporary on the stack, rather than
+ // passing a pointer to the original value. We do not want
+ // the kernel to potentially modify the input data.
+
+ // Note: don't annotate with TBAA, since the kernel might
+ // have its own TBAA annotations for the pointer argument.
+ Builder.CreateStore(InputLoad, TemporarySlot);
+ Input = TemporarySlot;
+ } else {
+ Input = InputLoad;
+ }
+
+ RootArgs.push_back(Input);
+ }
+ }
+
/* Performs the actual optimization on a selected function. On success, the
* Module will contain a new function of the name "<NAME>.expand" that
* invokes <NAME>() in a loop with the appropriate parameters.
@@ -595,7 +769,7 @@
/*
* Extract the expanded function's parameters. It is guaranteed by
- * createEmptyExpandedFunction that there will be four parameters.
+ * createEmptyExpandedForEachKernel that there will be four parameters.
*/
bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
@@ -725,7 +899,7 @@
/*
* Extract the expanded function's parameters. It is guaranteed by
- * createEmptyExpandedFunction that there will be four parameters.
+ * createEmptyExpandedForEachKernel that there will be four parameters.
*/
bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
@@ -802,7 +976,6 @@
CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
}
- llvm::SmallVector<llvm::Type*, 8> InTypes;
llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
@@ -826,47 +999,8 @@
const size_t NumInPtrArguments = NumRemainingInputs;
if (NumInPtrArguments > 0) {
- // Extract information about input slots and step sizes. The work done
- // here is loop-invariant, so we can hoist the operations out of the loop.
- auto OldInsertionPoint = Builder.saveIP();
- Builder.SetInsertPoint(LoopHeader->getTerminator());
-
- for (size_t InputIndex = 0; InputIndex < NumInPtrArguments; ++InputIndex, ArgIter++) {
- llvm::Type *InType = ArgIter->getType();
-
- /*
- * AArch64 calling conventions dictate that structs of sufficient size
- * get passed by pointer instead of passed by value. This, combined
- * with the fact that we don't allow kernels to operate on pointer
- * data means that if we see a kernel with a pointer parameter we know
- * that it is a struct input that has been promoted. As such we don't
- * need to convert its type to a pointer. Later we will need to know
- * to create a temporary copy on the stack, so we save this information
- * in InStructTempSlots.
- */
- if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
- llvm::Type *ElementType = PtrType->getElementType();
- InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
- "input_struct_slot"));
- } else {
- InType = InType->getPointerTo();
- InStructTempSlots.push_back(nullptr);
- }
-
- SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
- static_cast<int32_t>(InputIndex)}));
- llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
- llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
- llvm::Value *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
- if (gEnableRsTbaa) {
- InBufPtr->setMetadata("tbaa", TBAAPointer);
- }
-
- InTypes.push_back(InType);
- InBufPtrs.push_back(CastInBufPtr);
- }
-
- Builder.restoreIP(OldInsertionPoint);
+ ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
+ InBufPtrs, InStructTempSlots);
}
// Populate the actual call to kernel().
@@ -889,33 +1023,8 @@
// Inputs
if (NumInPtrArguments > 0) {
- llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
-
- for (size_t Index = 0; Index < NumInPtrArguments; ++Index) {
- llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
- llvm::Value *Input;
-
- llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
-
- if (gEnableRsTbaa) {
- InputLoad->setMetadata("tbaa", TBAAAllocation);
- }
-
- if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
- // Pass a pointer to a temporary on the stack, rather than
- // passing a pointer to the original value. We do not want
- // the kernel to potentially modify the input data.
-
- // Note: don't annotate with TBAA, since the kernel might
- // have its own TBAA annotations for the pointer argument.
- Builder.CreateStore(InputLoad, TemporarySlot);
- Input = TemporarySlot;
- } else {
- Input = InputLoad;
- }
-
- RootArgs.push_back(Input);
- }
+ ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
+ InBufPtrs, InStructTempSlots, IV, RootArgs);
}
finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
@@ -933,7 +1042,7 @@
return true;
}
- // Expand a reduce-style kernel function.
+ // Expand a simple reduce-style kernel function.
//
// The input is a kernel which represents a binary operation,
// of the form
@@ -999,7 +1108,7 @@
bool ExpandReduce(llvm::Function *Function) {
bccAssert(Function);
- ALOGV("Expanding reduce kernel %s", Function->getName().str().c_str());
+ ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str());
llvm::DataLayout DL(Module);
@@ -1020,7 +1129,7 @@
createEmptyExpandedReduceKernel(Function->getName());
// Extract the expanded kernel's parameters. It is guaranteed by
- // createEmptyExpandedFunction that there will be 3 parameters.
+ // createEmptyExpandedReduceKernel that there will be 3 parameters.
auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin();
llvm::Value *Arg_inBuf = &*(ExpandedFunctionArgIter++);
@@ -1196,6 +1305,118 @@
return true;
}
+ // Certain categories of functions that make up a general
+ // reduce-style kernel are called directly from the driver with no
+ // expansion needed. For a function in such a category, we need to
+ // promote linkage from static to external, to ensure that the
+ // function is visible to the driver in the dynamic symbol table.
+ // This promotion is safe because we don't have any kind of cross
+ // translation unit linkage model (except for linking against
+ // RenderScript libraries), so we do not risk name clashes.
+ bool PromoteReduceNewFunction(const char *Name, FunctionSet &PromotedFunctions) {
+ if (!Name) // a presumably-optional function that is not present
+ return false;
+
+ llvm::Function *Fn = Module->getFunction(Name);
+ bccAssert(Fn != nullptr);
+ if (PromotedFunctions.insert(Fn).second) {
+ bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
+ Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ return true;
+ }
+
+ return false;
+ }
+
+ // Expand the accumulator function for a general reduce-style kernel.
+ //
+ // The input is a function of the form
+ //
+ // define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
+ //
+ // where all arguments except the first are the same as for a foreach kernel.
+ //
+ // The input accumulator function gets expanded into a function of the form
+ //
+ // define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
+ //
+ // which performs a serial accumulaion of elements [x1, x2) into *%accum.
+ //
+ // In pseudocode, @func.expand does:
+ //
+ // for (i = %x1; i < %x2; ++i) {
+ // func(%accum,
+ // *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
+ // [, p] [, i] [, p->current.y] [, p->current.z]);
+ // }
+ //
+ // This is very similar to foreach kernel expansion with no output.
+ bool ExpandReduceNewAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
+ ALOGV("Expanding accumulator %s for general reduce kernel",
+ FnAccumulator->getName().str().c_str());
+
+ // Create TBAA meta-data.
+ llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
+ *TBAAAllocation, *TBAAPointer;
+ llvm::MDBuilder MDHelper(*Context);
+ TBAARenderScriptDistinct =
+ MDHelper.createTBAARoot(kRenderScriptTBAARootName);
+ TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
+ TBAARenderScriptDistinct);
+ TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
+ TBAARenderScript);
+ TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
+ TBAAAllocation, 0);
+ TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
+ TBAARenderScript);
+ TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
+
+ auto AccumulatorArgIter = FnAccumulator->arg_begin();
+
+ // Create empty accumulator function.
+ llvm::Function *FnExpandedAccumulator =
+ createEmptyExpandedReduceNewAccumulator(FnAccumulator->getName(),
+ (AccumulatorArgIter++)->getType());
+
+ // Extract the expanded accumulator's parameters. It is
+ // guaranteed by createEmptyExpandedReduceNewAccumulator that
+ // there will be 4 parameters.
+ bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
+ auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
+ llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++);
+ llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++);
+ llvm::Value *Arg_x2 = &*(ExpandedAccumulatorArgIter++);
+ llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
+
+ // Construct the actual function body.
+ llvm::IRBuilder<> Builder(FnExpandedAccumulator->getEntryBlock().begin());
+
+ // Create the loop structure.
+ llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
+ llvm::PHINode *IndVar;
+ createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
+
+ llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
+ const int CalleeArgsContextIdx =
+ ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
+ [](){}, LoopHeader->getTerminator());
+
+ llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
+ llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
+ ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
+ InBufPtrs, InStructTempSlots);
+
+ // Populate the actual call to the original accumulator.
+ llvm::SmallVector<llvm::Value*, 8> RootArgs;
+ RootArgs.push_back(Arg_accum);
+ ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InBufPtrs, InStructTempSlots,
+ IndVar, RootArgs);
+ finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
+ Builder.CreateCall(FnAccumulator, RootArgs);
+
+ return true;
+ }
+
/// @brief Checks if pointers to allocation internals are exposed
///
/// This function verifies if through the parameters passed to the kernel
@@ -1315,7 +1536,7 @@
}
}
- // Expand reduce_* style kernels.
+ // Expand simple reduce_* style kernels.
mExportReduceCount = me.getExportReduceCount();
mExportReduceNameList = me.getExportReduceNameList();
@@ -1326,6 +1547,25 @@
}
}
+ // Process general reduce_* style functions.
+ const size_t ExportReduceNewCount = me.getExportReduceNewCount();
+ const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList();
+ // Note that functions can be shared between kernels
+ FunctionSet PromotedFunctions, ExpandedAccumulators;
+
+ for (size_t i = 0; i < ExportReduceNewCount; ++i) {
+ Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions);
+ Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions);
+
+ // Accumulator
+ llvm::Function *accumulator = Module.getFunction(ExportReduceNewList[i].mAccumulatorName);
+ bccAssert(accumulator != nullptr);
+ if (ExpandedAccumulators.insert(accumulator).second)
+ Changed |= ExpandReduceNewAccumulator(accumulator,
+ ExportReduceNewList[i].mSignature,
+ ExportReduceNewList[i].mInputCount);
+ }
+
if (gEnableRsTbaa && !allocPointersExposed(Module)) {
connectRenderScriptTBAAMetadata(Module);
}