Merge "Fix the MD debug info to the `expand\' kernels" am: 4a82aa5
am: 8c57a75
* commit '8c57a75aad2a60b3aaf875e372c3a3f7ed82d3ef':
Fix the MD debug info to the `expand' kernels
Change-Id: I32bfb70fc1f0c231f95b7db060ebe3bc889f2274
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index fe16b71..4062294 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -358,12 +358,12 @@
}
// Expanded foreach and reduce functions should not be internalized;
- // nor should general reduction initializer and outconverter
- // functions. keep_funcs keeps the names of these functions around
- // until createInternalizePass() is finished making its own copy of
- // the visible symbols.
+ // nor should general reduction initializer, combiner, and
+ // outconverter functions. keep_funcs keeps the names of these
+ // functions around until createInternalizePass() is finished making
+ // its own copy of the visible symbols.
std::vector<std::string> keep_funcs;
- keep_funcs.reserve(exportForEachCount + exportReduceCount + exportReduceNewCount*3);
+ keep_funcs.reserve(exportForEachCount + exportReduceCount + exportReduceNewCount*4);
for (i = 0; i < exportForEachCount; ++i) {
keep_funcs.push_back(std::string(exportForEachNameList[i]) + ".expand");
@@ -376,8 +376,8 @@
};
for (i = 0; i < exportReduceNewCount; ++i) {
keep_funcs.push_back(std::string(exportReduceNewList[i].mAccumulatorName) + ".expand");
- // Note: driver does not currently use the combiner function
keepFuncsPushBackIfPresent(exportReduceNewList[i].mInitializerName);
+ keepFuncsPushBackIfPresent(exportReduceNewList[i].mCombinerName);
keepFuncsPushBackIfPresent(exportReduceNewList[i].mOutConverterName);
}
diff --git a/lib/Renderscript/RSKernelExpand.cpp b/lib/Renderscript/RSKernelExpand.cpp
index 7337a30..d3353ad 100644
--- a/lib/Renderscript/RSKernelExpand.cpp
+++ b/lib/Renderscript/RSKernelExpand.cpp
@@ -1585,6 +1585,7 @@
for (size_t i = 0; i < ExportReduceNewCount; ++i) {
Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions);
+ Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mCombinerName, PromotedFunctions);
Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions);
// Accumulator
diff --git a/lib/Support/CompilerConfig.cpp b/lib/Support/CompilerConfig.cpp
index 68d943a..b2b3a55 100644
--- a/lib/Support/CompilerConfig.cpp
+++ b/lib/Support/CompilerConfig.cpp
@@ -145,6 +145,14 @@
if (features.count("fp16") && features["fp16"])
attributes.push_back("+fp16");
+#if defined(PROVIDE_ARM64_CODEGEN)
+ // On AArch64, asimd in /proc/cpuinfo signals the presence of hardware
+ // half-precision conversion instructions. getHostCPUFeatures translates
+ // this to "neon". If PROVIDE_ARM64_CODEGEN is set, enable "+fp16" for ARM
+ // codegen if "neon" is present in features.
+ if (features.count("neon") && features["neon"])
+ attributes.push_back("+fp16");
+#endif // PROVIDE_ARM64_CODEGEN
#if defined(TARGET_BUILD)
if (!getProperty("debug.rs.arm-no-tune-for-cpu")) {