Handle FP precision in kernel fusion

b/19098612

Find the proper core lib for full-precision and relaxed-precions
FP. Path both to bcc while compiling a script group, and let bcc
pick the right one based on the precision of the merged module.

Change-Id: I2a641387f0990463887594729a935a5c3f0f856f
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 6bc98b4..27a27dd 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -201,6 +201,32 @@
 
 #ifndef RS_COMPATIBILITY_LIB
 
+string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
+    *coreLibRelaxedPath = "";
+
+    // If we're debugging, use the debug library.
+    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
+        return SYSLIBPATH"/libclcore_debug.bc";
+    }
+
+    // Check for a platform specific library
+
+#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
+    // NEON-capable ARMv7a devices can use an accelerated math library
+    // for all reduced precision scripts.
+    // ARMv8 does not use NEON, as ASIMD can be used with all precision
+    // levels.
+    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
+#endif
+
+#if defined(__i386__) || defined(__x86_64__)
+    // x86 devices will use an optimized library.
+    return SYSLIBPATH"/libclcore_x86.bc";
+#else
+    return SYSLIBPATH"/libclcore.bc";
+#endif
+}
+
 string getFileName(string path) {
     unsigned found = path.find_last_of("/\\");
     return path.substr(found + 1);
@@ -210,14 +236,17 @@
         const vector<string>& inputs, const vector<string>& kernelBatches,
         const vector<string>& invokeBatches,
         const string& output_dir, const string& output_filename,
-        const string& rsLib, vector<const char*>* args) {
+        const string& coreLibPath, const string& coreLibRelaxedPath,
+        vector<const char*>* args) {
     args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
     args->push_back("-fPIC");
     args->push_back("-embedRSInfo");
     args->push_back("-mtriple");
     args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
     args->push_back("-bclib");
-    args->push_back(rsLib.c_str());
+    args->push_back(coreLibPath.c_str());
+    args->push_back("-bclib_relaxed");
+    args->push_back(coreLibRelaxedPath.c_str());
     for (const string& input : inputs) {
         args->push_back(input.c_str());
     }
@@ -352,10 +381,12 @@
     TEMP_FAILURE_RETRY(close(tempfd));
 
     string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
-    string rsLibPath(SYSLIBPATH"/libclcore.bc");
+    string coreLibRelaxedPath;
+    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
+                                               &coreLibRelaxedPath);
     vector<const char*> arguments;
     setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
-                          outputFileName, rsLibPath, &arguments);
+                          outputFileName, coreLibPath, coreLibRelaxedPath, &arguments);
     std::unique_ptr<const char> joined(
         rsuJoinStrings(arguments.size() - 1, arguments.data()));
     string commandLine (joined.get());