Merge "If a general reduction kernel lacks a combiner function, synthesize one."
diff --git a/include/bcc/Renderscript/RSUtils.h b/include/bcc/Renderscript/RSUtils.h
index 19b45f6..fbd5ed0 100644
--- a/include/bcc/Renderscript/RSUtils.h
+++ b/include/bcc/Renderscript/RSUtils.h
@@ -21,6 +21,9 @@
#include <llvm/IR/Type.h>
#include <llvm/IR/DerivedTypes.h>
+#include <llvm/ADT/StringRef.h>
+
+#include <string>
namespace {
@@ -102,4 +105,12 @@
} // end namespace
+// When we have a general reduction kernel with no combiner function,
+// we will synthesize a combiner function from the accumulator
+// function. Given the accumulator function name, what should be the
+// name of the combiner function?
+static inline std::string nameReduceNewCombinerFromAccumulator(llvm::StringRef accumName) {
+ return std::string(accumName) + ".combiner";
+}
+
#endif // BCC_RS_UTILS_H
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index 7b291f0..1988da3 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -35,6 +35,7 @@
#include "bcc/Config/Config.h"
#include "bcc/Renderscript/RSScript.h"
#include "bcc/Renderscript/RSTransforms.h"
+#include "bcc/Renderscript/RSUtils.h"
#include "bcc/Script.h"
#include "bcc/Source.h"
#include "bcc/Support/CompilerConfig.h"
@@ -389,7 +390,11 @@
for (i = 0; i < exportReduceNewCount; ++i) {
keep_funcs.push_back(std::string(exportReduceNewList[i].mAccumulatorName) + ".expand");
keepFuncsPushBackIfPresent(exportReduceNewList[i].mInitializerName);
- keepFuncsPushBackIfPresent(exportReduceNewList[i].mCombinerName);
+ if (exportReduceNewList[i].mCombinerName != nullptr) {
+ keep_funcs.push_back(exportReduceNewList[i].mCombinerName);
+ } else {
+ keep_funcs.push_back(nameReduceNewCombinerFromAccumulator(exportReduceNewList[i].mAccumulatorName));
+ }
keepFuncsPushBackIfPresent(exportReduceNewList[i].mOutConverterName);
}
diff --git a/lib/Renderscript/RSEmbedInfo.cpp b/lib/Renderscript/RSEmbedInfo.cpp
index 54e0acb..2d2e69f 100644
--- a/lib/Renderscript/RSEmbedInfo.cpp
+++ b/lib/Renderscript/RSEmbedInfo.cpp
@@ -17,6 +17,7 @@
#include "bcc/Assert.h"
#include "bcc/Config/Config.h"
#include "bcc/Renderscript/RSTransforms.h"
+#include "bcc/Renderscript/RSUtils.h"
#include "bcc/Support/Log.h"
#include "bcinfo/MetadataExtractor.h"
#include "rsDefines.h"
@@ -139,7 +140,9 @@
<< reduceNew.mReduceName << " - "
<< reduceNewFnName(reduceNew.mInitializerName) << " - "
<< reduceNewFnName(reduceNew.mAccumulatorName) << " - "
- << reduceNewFnName(reduceNew.mCombinerName) << " - "
+ << ((reduceNew.mCombinerName != nullptr)
+ ? reduceNew.mCombinerName
+ : nameReduceNewCombinerFromAccumulator(reduceNew.mAccumulatorName)) << " - "
<< reduceNewFnName(reduceNew.mOutConverterName) << " - "
<< reduceNewFnName(reduceNew.mHalterName)
<< "\n";
diff --git a/lib/Renderscript/RSKernelExpand.cpp b/lib/Renderscript/RSKernelExpand.cpp
index 674e51f..893b186 100644
--- a/lib/Renderscript/RSKernelExpand.cpp
+++ b/lib/Renderscript/RSKernelExpand.cpp
@@ -16,6 +16,7 @@
#include "bcc/Assert.h"
#include "bcc/Renderscript/RSTransforms.h"
+#include "bcc/Renderscript/RSUtils.h"
#include <cstdlib>
#include <functional>
@@ -1513,6 +1514,80 @@
return true;
}
+ // Create a combiner function for a general reduce-style kernel that lacks one,
+ // by calling the accumulator function.
+ //
+ // The accumulator function must be of the form
+ //
+ // define void @accumFn(accumType* %accum, accumType %in)
+ //
+ // A combiner function will be generated of the form
+ //
+ // define void @accumFn.combiner(accumType* %accum, accumType* %other) {
+ // %1 = load accumType, accumType* %other
+ // call void @accumFn(accumType* %accum, accumType %1);
+ // }
+ bool CreateReduceNewCombinerFromAccumulator(llvm::Function *FnAccumulator) {
+ ALOGV("Creating combiner from accumulator %s for general reduce kernel",
+ FnAccumulator->getName().str().c_str());
+
+ using llvm::Attribute;
+
+ bccAssert(FnAccumulator->arg_size() == 2);
+ auto AccumulatorArgIter = FnAccumulator->arg_begin();
+ llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++);
+ llvm::Value *AccumulatorArg_in = &*(AccumulatorArgIter++);
+ llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType();
+ bccAssert(AccumulatorArgType->isPointerTy());
+
+ llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
+ llvm::FunctionType *CombinerType =
+ llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
+ llvm::Function *FnCombiner =
+ llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
+ nameReduceNewCombinerFromAccumulator(FnAccumulator->getName()),
+ Module);
+
+ auto CombinerArgIter = FnCombiner->arg_begin();
+
+ llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++);
+ CombinerArg_accum->setName("accum");
+ CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1,
+ llvm::makeArrayRef(Attribute::NoCapture)));
+
+ llvm::Argument *CombinerArg_other = &(*CombinerArgIter++);
+ CombinerArg_other->setName("other");
+ CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1,
+ llvm::makeArrayRef(Attribute::NoCapture)));
+
+ llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner);
+ llvm::IRBuilder<> Builder(BB);
+
+ if (AccumulatorArg_in->getType()->isPointerTy()) {
+ // Types of sufficient size get passed by pointer-to-copy rather
+ // than passed by value. An accumulator cannot take a pointer
+ // at the user level; so if we see a pointer here, we know that
+ // we have a pass-by-pointer-to-copy case.
+ llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType();
+ llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy");
+ Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem);
+ Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem });
+ } else {
+ llvm::Value *TypeAdjustedOther = CombinerArg_other;
+ if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) {
+ // Call lowering by frontend has done some type coercion
+ TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other,
+ AccumulatorArg_in->getType()->getPointerTo(),
+ "cast");
+ }
+ llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther);
+ Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther });
+ }
+ Builder.CreateRetVoid();
+
+ return true;
+ }
+
/// @brief Checks if pointers to allocation internals are exposed
///
/// This function verifies if through the parameters passed to the kernel
@@ -1647,7 +1722,7 @@
const size_t ExportReduceNewCount = me.getExportReduceNewCount();
const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList();
// Note that functions can be shared between kernels
- FunctionSet PromotedFunctions, ExpandedAccumulators;
+ FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
for (size_t i = 0; i < ExportReduceNewCount; ++i) {
Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions);
@@ -1661,6 +1736,10 @@
Changed |= ExpandReduceNewAccumulator(accumulator,
ExportReduceNewList[i].mSignature,
ExportReduceNewList[i].mInputCount);
+ if (!ExportReduceNewList[i].mCombinerName) {
+ if (AccumulatorsForCombiners.insert(accumulator).second)
+ Changed |= CreateReduceNewCombinerFromAccumulator(accumulator);
+ }
}
if (gEnableRsTbaa && !allocPointersExposed(Module)) {