Separate CPU driver impl from reference driver.

Change-Id: Ifb484edda665959b81d7b1f890d108bfa20a535d
diff --git a/cpu_ref/rsCpuRuntimeStubs.cpp b/cpu_ref/rsCpuRuntimeStubs.cpp
new file mode 100644
index 0000000..b87a639
--- /dev/null
+++ b/cpu_ref/rsCpuRuntimeStubs.cpp
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2011-2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptC.h"
+#include "rsMatrix4x4.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix2x2.h"
+#include "rsRuntime.h"
+
+#include "utils/Timers.h"
+#include "rsCpuCore.h"
+#include "rsCpuScript.h"
+
+#include <time.h>
+
+using namespace android;
+using namespace android::renderscript;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef char char2 __attribute__((ext_vector_type(2)));
+typedef char char3 __attribute__((ext_vector_type(3)));
+typedef char char4 __attribute__((ext_vector_type(4)));
+typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
+typedef unsigned char uchar3 __attribute__((ext_vector_type(3)));
+typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
+typedef short short2 __attribute__((ext_vector_type(2)));
+typedef short short3 __attribute__((ext_vector_type(3)));
+typedef short short4 __attribute__((ext_vector_type(4)));
+typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
+typedef unsigned short ushort3 __attribute__((ext_vector_type(3)));
+typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
+typedef int32_t int2 __attribute__((ext_vector_type(2)));
+typedef int32_t int3 __attribute__((ext_vector_type(3)));
+typedef int32_t int4 __attribute__((ext_vector_type(4)));
+typedef uint32_t uint2 __attribute__((ext_vector_type(2)));
+typedef uint32_t uint3 __attribute__((ext_vector_type(3)));
+typedef uint32_t uint4 __attribute__((ext_vector_type(4)));
+typedef long long long2 __attribute__((ext_vector_type(2)));
+typedef long long long3 __attribute__((ext_vector_type(3)));
+typedef long long long4 __attribute__((ext_vector_type(4)));
+typedef unsigned long long ulong2 __attribute__((ext_vector_type(2)));
+typedef unsigned long long ulong3 __attribute__((ext_vector_type(3)));
+typedef unsigned long long ulong4 __attribute__((ext_vector_type(4)));
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Message routines
+//////////////////////////////////////////////////////////////////////////////
+
+
+int SC_divsi3(int a, int b) {
+    return a / b;
+}
+
+int SC_modsi3(int a, int b) {
+    return a % b;
+}
+
+unsigned int SC_udivsi3(unsigned int a, unsigned int b) {
+    return a / b;
+}
+
+unsigned int SC_umodsi3(unsigned int a, unsigned int b) {
+    return a % b;
+}
+
+static void SC_debugF(const char *s, float f) {
+    ALOGD("%s %f, 0x%08x", s, f, *((int *) (&f)));
+}
+static void SC_debugFv2(const char *s, float f1, float f2) {
+    ALOGD("%s {%f, %f}", s, f1, f2);
+}
+static void SC_debugFv3(const char *s, float f1, float f2, float f3) {
+    ALOGD("%s {%f, %f, %f}", s, f1, f2, f3);
+}
+static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) {
+    ALOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4);
+}
+static void SC_debugF2(const char *s, float2 f) {
+    ALOGD("%s {%f, %f}", s, f.x, f.y);
+}
+static void SC_debugF3(const char *s, float3 f) {
+    ALOGD("%s {%f, %f, %f}", s, f.x, f.y, f.z);
+}
+static void SC_debugF4(const char *s, float4 f) {
+    ALOGD("%s {%f, %f, %f, %f}", s, f.x, f.y, f.z, f.w);
+}
+static void SC_debugD(const char *s, double d) {
+    ALOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d)));
+}
+static void SC_debugFM4v4(const char *s, const float *f) {
+    ALOGD("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]);
+    ALOGD("%s  %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]);
+    ALOGD("%s  %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]);
+    ALOGD("%s  %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]);
+}
+static void SC_debugFM3v3(const char *s, const float *f) {
+    ALOGD("%s {%f, %f, %f", s, f[0], f[3], f[6]);
+    ALOGD("%s  %f, %f, %f", s, f[1], f[4], f[7]);
+    ALOGD("%s  %f, %f, %f}",s, f[2], f[5], f[8]);
+}
+static void SC_debugFM2v2(const char *s, const float *f) {
+    ALOGD("%s {%f, %f", s, f[0], f[2]);
+    ALOGD("%s  %f, %f}",s, f[1], f[3]);
+}
+static void SC_debugI8(const char *s, char c) {
+    ALOGD("%s %hhd  0x%hhx", s, c, (unsigned char)c);
+}
+static void SC_debugC2(const char *s, char2 c) {
+    ALOGD("%s {%hhd, %hhd}  0x%hhx 0x%hhx", s, c.x, c.y, (unsigned char)c.x, (unsigned char)c.y);
+}
+static void SC_debugC3(const char *s, char3 c) {
+    ALOGD("%s {%hhd, %hhd, %hhd}  0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z);
+}
+static void SC_debugC4(const char *s, char4 c) {
+    ALOGD("%s {%hhd, %hhd, %hhd, %hhd}  0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z, (unsigned char)c.w);
+}
+static void SC_debugU8(const char *s, unsigned char c) {
+    ALOGD("%s %hhu  0x%hhx", s, c, c);
+}
+static void SC_debugUC2(const char *s, uchar2 c) {
+    ALOGD("%s {%hhu, %hhu}  0x%hhx 0x%hhx", s, c.x, c.y, c.x, c.y);
+}
+static void SC_debugUC3(const char *s, uchar3 c) {
+    ALOGD("%s {%hhu, %hhu, %hhu}  0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.x, c.y, c.z);
+}
+static void SC_debugUC4(const char *s, uchar4 c) {
+    ALOGD("%s {%hhu, %hhu, %hhu, %hhu}  0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
+}
+static void SC_debugI16(const char *s, short c) {
+    ALOGD("%s %hd  0x%hx", s, c, c);
+}
+static void SC_debugS2(const char *s, short2 c) {
+    ALOGD("%s {%hd, %hd}  0x%hx 0x%hx", s, c.x, c.y, c.x, c.y);
+}
+static void SC_debugS3(const char *s, short3 c) {
+    ALOGD("%s {%hd, %hd, %hd}  0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z);
+}
+static void SC_debugS4(const char *s, short4 c) {
+    ALOGD("%s {%hd, %hd, %hd, %hd}  0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
+}
+static void SC_debugU16(const char *s, unsigned short c) {
+    ALOGD("%s %hu  0x%hx", s, c, c);
+}
+static void SC_debugUS2(const char *s, ushort2 c) {
+    ALOGD("%s {%hu, %hu}  0x%hx 0x%hx", s, c.x, c.y, c.x, c.y);
+}
+static void SC_debugUS3(const char *s, ushort3 c) {
+    ALOGD("%s {%hu, %hu, %hu}  0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z);
+}
+static void SC_debugUS4(const char *s, ushort4 c) {
+    ALOGD("%s {%hu, %hu, %hu, %hu}  0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
+}
+static void SC_debugI32(const char *s, int32_t i) {
+    ALOGD("%s %d  0x%x", s, i, i);
+}
+static void SC_debugI2(const char *s, int2 i) {
+    ALOGD("%s {%d, %d}  0x%x 0x%x", s, i.x, i.y, i.x, i.y);
+}
+static void SC_debugI3(const char *s, int3 i) {
+    ALOGD("%s {%d, %d, %d}  0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z);
+}
+static void SC_debugI4(const char *s, int4 i) {
+    ALOGD("%s {%d, %d, %d, %d}  0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
+}
+static void SC_debugU32(const char *s, uint32_t i) {
+    ALOGD("%s %u  0x%x", s, i, i);
+}
+static void SC_debugUI2(const char *s, uint2 i) {
+    ALOGD("%s {%u, %u}  0x%x 0x%x", s, i.x, i.y, i.x, i.y);
+}
+static void SC_debugUI3(const char *s, uint3 i) {
+    ALOGD("%s {%u, %u, %u}  0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z);
+}
+static void SC_debugUI4(const char *s, uint4 i) {
+    ALOGD("%s {%u, %u, %u, %u}  0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
+}
+static void SC_debugLL64(const char *s, long long ll) {
+    ALOGD("%s %lld  0x%llx", s, ll, ll);
+}
+static void SC_debugL2(const char *s, long2 ll) {
+    ALOGD("%s {%lld, %lld}  0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y);
+}
+static void SC_debugL3(const char *s, long3 ll) {
+    ALOGD("%s {%lld, %lld, %lld}  0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z);
+}
+static void SC_debugL4(const char *s, long4 ll) {
+    ALOGD("%s {%lld, %lld, %lld, %lld}  0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w);
+}
+static void SC_debugULL64(const char *s, unsigned long long ll) {
+    ALOGD("%s %llu  0x%llx", s, ll, ll);
+}
+static void SC_debugUL2(const char *s, ulong2 ll) {
+    ALOGD("%s {%llu, %llu}  0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y);
+}
+static void SC_debugUL3(const char *s, ulong3 ll) {
+    ALOGD("%s {%llu, %llu, %llu}  0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z);
+}
+static void SC_debugUL4(const char *s, ulong4 ll) {
+    ALOGD("%s {%llu, %llu, %llu, %llu}  0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w);
+}
+static void SC_debugP(const char *s, const void *p) {
+    ALOGD("%s %p", s, p);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Stub implementation
+//////////////////////////////////////////////////////////////////////////////
+
+// llvm name mangling ref
+//  <builtin-type> ::= v  # void
+//                 ::= b  # bool
+//                 ::= c  # char
+//                 ::= a  # signed char
+//                 ::= h  # unsigned char
+//                 ::= s  # short
+//                 ::= t  # unsigned short
+//                 ::= i  # int
+//                 ::= j  # unsigned int
+//                 ::= l  # long
+//                 ::= m  # unsigned long
+//                 ::= x  # long long, __int64
+//                 ::= y  # unsigned long long, __int64
+//                 ::= f  # float
+//                 ::= d  # double
+
+static RsdCpuReference::CpuSymbol gSyms[] = {
+    { "memset", (void *)&memset, true },
+    { "memcpy", (void *)&memcpy, true },
+
+    // Debug
+    { "_Z7rsDebugPKcf", (void *)&SC_debugF, true },
+    { "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true },
+    { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true },
+    { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true },
+    { "_Z7rsDebugPKcDv2_f", (void *)&SC_debugF2, true },
+    { "_Z7rsDebugPKcDv3_f", (void *)&SC_debugF3, true },
+    { "_Z7rsDebugPKcDv4_f", (void *)&SC_debugF4, true },
+    { "_Z7rsDebugPKcd", (void *)&SC_debugD, true },
+    { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true },
+    { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true },
+    { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true },
+    { "_Z7rsDebugPKcc", (void *)&SC_debugI8, true },
+    { "_Z7rsDebugPKcDv2_c", (void *)&SC_debugC2, true },
+    { "_Z7rsDebugPKcDv3_c", (void *)&SC_debugC3, true },
+    { "_Z7rsDebugPKcDv4_c", (void *)&SC_debugC4, true },
+    { "_Z7rsDebugPKch", (void *)&SC_debugU8, true },
+    { "_Z7rsDebugPKcDv2_h", (void *)&SC_debugUC2, true },
+    { "_Z7rsDebugPKcDv3_h", (void *)&SC_debugUC3, true },
+    { "_Z7rsDebugPKcDv4_h", (void *)&SC_debugUC4, true },
+    { "_Z7rsDebugPKcs", (void *)&SC_debugI16, true },
+    { "_Z7rsDebugPKcDv2_s", (void *)&SC_debugS2, true },
+    { "_Z7rsDebugPKcDv3_s", (void *)&SC_debugS3, true },
+    { "_Z7rsDebugPKcDv4_s", (void *)&SC_debugS4, true },
+    { "_Z7rsDebugPKct", (void *)&SC_debugU16, true },
+    { "_Z7rsDebugPKcDv2_t", (void *)&SC_debugUS2, true },
+    { "_Z7rsDebugPKcDv3_t", (void *)&SC_debugUS3, true },
+    { "_Z7rsDebugPKcDv4_t", (void *)&SC_debugUS4, true },
+    { "_Z7rsDebugPKci", (void *)&SC_debugI32, true },
+    { "_Z7rsDebugPKcDv2_i", (void *)&SC_debugI2, true },
+    { "_Z7rsDebugPKcDv3_i", (void *)&SC_debugI3, true },
+    { "_Z7rsDebugPKcDv4_i", (void *)&SC_debugI4, true },
+    { "_Z7rsDebugPKcj", (void *)&SC_debugU32, true },
+    { "_Z7rsDebugPKcDv2_j", (void *)&SC_debugUI2, true },
+    { "_Z7rsDebugPKcDv3_j", (void *)&SC_debugUI3, true },
+    { "_Z7rsDebugPKcDv4_j", (void *)&SC_debugUI4, true },
+    // Both "long" and "unsigned long" need to be redirected to their
+    // 64-bit counterparts, since we have hacked Slang to use 64-bit
+    // for "long" on Arm (to be similar to Java).
+    { "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true },
+    { "_Z7rsDebugPKcDv2_l", (void *)&SC_debugL2, true },
+    { "_Z7rsDebugPKcDv3_l", (void *)&SC_debugL3, true },
+    { "_Z7rsDebugPKcDv4_l", (void *)&SC_debugL4, true },
+    { "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true },
+    { "_Z7rsDebugPKcDv2_m", (void *)&SC_debugUL2, true },
+    { "_Z7rsDebugPKcDv3_m", (void *)&SC_debugUL3, true },
+    { "_Z7rsDebugPKcDv4_m", (void *)&SC_debugUL4, true },
+    { "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true },
+    { "_Z7rsDebugPKcDv2_x", (void *)&SC_debugL2, true },
+    { "_Z7rsDebugPKcDv3_x", (void *)&SC_debugL3, true },
+    { "_Z7rsDebugPKcDv4_x", (void *)&SC_debugL4, true },
+    { "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true },
+    { "_Z7rsDebugPKcDv2_y", (void *)&SC_debugUL2, true },
+    { "_Z7rsDebugPKcDv3_y", (void *)&SC_debugUL3, true },
+    { "_Z7rsDebugPKcDv4_y", (void *)&SC_debugUL4, true },
+    { "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true },
+
+    { NULL, NULL, false }
+};
+
+
+void * RsdCpuScriptImpl::lookupRuntimeStub(void* pContext, char const* name) {
+    RsdCpuScriptImpl *s = (RsdCpuScriptImpl *)pContext;
+    const RsdCpuReference::CpuSymbol *syms = gSyms;
+    const RsdCpuReference::CpuSymbol *sym = NULL;
+
+    sym = s->mCtx->symLookup(name);
+    if (!sym) {
+        sym = s->lookupSymbolMath(name);
+    }
+    if (!sym) {
+        while (syms->fnPtr) {
+            if (!strcmp(syms->name, name)) {
+                sym = syms;
+            }
+            syms++;
+        }
+    }
+
+    if (sym) {
+        s->mIsThreadable &= sym->threadable;
+        return sym->fnPtr;
+    }
+    ALOGE("ScriptC sym lookup failed for %s", name);
+    return NULL;
+}
+
+