Add x86 server support.
Change-Id: I674acaf15b67afa48bc736f72942a11e2e38e940
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index c78b238..de71112 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -24,10 +24,20 @@
#include <sys/types.h>
#include <sys/resource.h>
#include <sched.h>
-#include <cutils/properties.h>
#include <sys/syscall.h>
#include <string.h>
+
+#ifndef RS_SERVER
+#include <cutils/properties.h>
#include "utils/StopWatch.h"
+#endif
+
+#ifdef RS_SERVER
+// Android exposes gettid(), standard Linux does not
+static pid_t gettid() {
+ return syscall(SYS_gettid);
+}
+#endif
using namespace android;
using namespace android::renderscript;
@@ -102,8 +112,7 @@
void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc;
-
- uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount);
+ uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1);
//ALOGV("RS helperThread starting %p idx=%i", dc, idx);
@@ -132,7 +141,7 @@
// idx +1 is used because the calling thread is always worker 0.
dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
}
- android_atomic_dec(&dc->mWorkers.mRunningCount);
+ __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1);
dc->mWorkers.mCompleteSignal.set();
}
@@ -153,7 +162,9 @@
return;
}
- android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
+ mWorkers.mRunningCount = mWorkers.mCount;
+ __sync_synchronize();
+
for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
mWorkers.mLaunchSignals[ct].set();
}
@@ -164,7 +175,7 @@
mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
}
- while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
+ while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
mWorkers.mCompleteSignal.wait();
}
}
@@ -224,8 +235,9 @@
mWorkers.mCompleteSignal.init();
- android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
- android_atomic_release_store(0, &mWorkers.mLaunchCount);
+ mWorkers.mRunningCount = mWorkers.mCount;
+ mWorkers.mLaunchCount = 0;
+ __sync_synchronize();
pthread_attr_t threadAttr;
status = pthread_attr_init(&threadAttr);
@@ -242,7 +254,7 @@
break;
}
}
- while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
+ while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
usleep(100);
}
@@ -261,7 +273,8 @@
mExit = true;
mWorkers.mLaunchData = NULL;
mWorkers.mLaunchCallback = NULL;
- android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
+ mWorkers.mRunningCount = mWorkers.mCount;
+ __sync_synchronize();
for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
mWorkers.mLaunchSignals[ct].set();
}
@@ -269,7 +282,7 @@
for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
pthread_join(mWorkers.mThreadId[ct], &res);
}
- rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0);
+ rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0);
// Global structure cleanup.
lockMutex();
@@ -292,7 +305,7 @@
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
while (1) {
- uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
uint32_t yEnd = yStart + mtls->mSliceSize;
yEnd = rsMin(yEnd, mtls->yEnd);
@@ -322,7 +335,7 @@
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
while (1) {
- uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
uint32_t xEnd = xStart + mtls->mSliceSize;
xEnd = rsMin(xEnd, mtls->xEnd);
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index 2eca373..03f24d8 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -135,7 +135,7 @@
uint4 v2 = (v + 0x7f) >> (int4)8;
uchar4 ret = convert_uchar4(v2);
- ret.a = in->a;
+ ret.w = in->w;
#if 0
if (!x1) {
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index d7b01b6..4e9470e 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -143,7 +143,7 @@
for (;x1 < x2; x1++, out++, in++) {
short4 in_s = convert_short4(*in);
short4 out_s = convert_short4(*out);
- in_s = in_s + ((out_s * (short4)(255 - in_s.a)) >> (short4)8);
+ in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
*out = convert_uchar4(in_s);
}
break;
@@ -160,7 +160,7 @@
for (;x1 < x2; x1++, out++, in++) {
short4 in_s = convert_short4(*in);
short4 out_s = convert_short4(*out);
- in_s = out_s + ((in_s * (short4)(255 - out_s.a)) >> (short4)8);
+ in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
*out = convert_uchar4(in_s);
}
break;
@@ -176,7 +176,7 @@
#endif
for (;x1 < x2; x1++, out++, in++) {
short4 in_s = convert_short4(*in);
- in_s = (in_s * out->a) >> (short4)8;
+ in_s = (in_s * out->w) >> (short4)8;
*out = convert_uchar4(in_s);
}
break;
@@ -192,7 +192,7 @@
#endif
for (;x1 < x2; x1++, out++, in++) {
short4 out_s = convert_short4(*out);
- out_s = (out_s * in->a) >> (short4)8;
+ out_s = (out_s * in->w) >> (short4)8;
*out = convert_uchar4(out_s);
}
break;
@@ -208,7 +208,7 @@
#endif
for (;x1 < x2; x1++, out++, in++) {
short4 in_s = convert_short4(*in);
- in_s = (in_s * (short4)(255 - out->a)) >> (short4)8;
+ in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
*out = convert_uchar4(in_s);
}
break;
@@ -224,7 +224,7 @@
#endif
for (;x1 < x2; x1++, out++, in++) {
short4 out_s = convert_short4(*out);
- out_s = (out_s * (short4)(255 - in->a)) >> (short4)8;
+ out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
*out = convert_uchar4(out_s);
}
break;
@@ -241,8 +241,8 @@
for (;x1 < x2; x1++, out++, in++) {
short4 in_s = convert_short4(*in);
short4 out_s = convert_short4(*out);
- out_s.rgb = ((in_s.rgb * out_s.a) +
- (out_s.rgb * ((short3)255 - (short3)in_s.a))) >> (short3)8;
+ out_s.xyz = ((in_s.xyz * out_s.w) +
+ (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
*out = convert_uchar4(out_s);
}
break;
@@ -259,8 +259,8 @@
for (;x1 < x2; x1++, out++, in++) {
short4 in_s = convert_short4(*in);
short4 out_s = convert_short4(*out);
- out_s.rgb = ((out_s.rgb * in_s.a) +
- (in_s.rgb * ((short3)255 - (short3)out_s.a))) >> (short3)8;
+ out_s.xyz = ((out_s.xyz * in_s.w) +
+ (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
*out = convert_uchar4(out_s);
}
break;
@@ -388,12 +388,12 @@
}
#endif
for (;x1 < x2; x1++, out++, in++) {
- uint32_t iR = in->r, iG = in->g, iB = in->b, iA = in->a,
- oR = out->r, oG = out->g, oB = out->b, oA = out->a;
- out->r = (oR + iR) > 255 ? 255 : oR + iR;
- out->g = (oG + iG) > 255 ? 255 : oG + iG;
- out->b = (oB + iB) > 255 ? 255 : oB + iB;
- out->a = (oA + iA) > 255 ? 255 : oA + iA;
+ uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
+ oR = out->x, oG = out->y, oB = out->z, oA = out->w;
+ out->x = (oR + iR) > 255 ? 255 : oR + iR;
+ out->y = (oG + iG) > 255 ? 255 : oG + iG;
+ out->z = (oB + iB) > 255 ? 255 : oB + iB;
+ out->w = (oA + iA) > 255 ? 255 : oA + iA;
}
break;
case BLEND_SUBTRACT:
@@ -407,12 +407,12 @@
}
#endif
for (;x1 < x2; x1++, out++, in++) {
- int32_t iR = in->r, iG = in->g, iB = in->b, iA = in->a,
- oR = out->r, oG = out->g, oB = out->b, oA = out->a;
- out->r = (oR - iR) < 0 ? 0 : oR - iR;
- out->g = (oG - iG) < 0 ? 0 : oG - iG;
- out->b = (oB - iB) < 0 ? 0 : oB - iB;
- out->a = (oA - iA) < 0 ? 0 : oA - iA;
+ int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
+ oR = out->x, oG = out->y, oB = out->z, oA = out->w;
+ out->x = (oR - iR) < 0 ? 0 : oR - iR;
+ out->y = (oG - iG) < 0 ? 0 : oG - iG;
+ out->z = (oB - iB) < 0 ? 0 : oB - iB;
+ out->w = (oA - iA) < 0 ? 0 : oA - iA;
}
break;
case BLEND_STAMP:
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index bcd5ffd..112f377 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -113,7 +113,7 @@
//ALOGE("x %i %i,%i,%i,%i %i,%i,%i,%i", x, o.x, o.y, o.z, o.w, out[0].x, out[0].y, out[0].z, out[0].w);
//}
//o.w = 0xff;
- out->rgba = o.rgba;
+ out->xyzw = o.xyzw;
}
extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, const void *y1,
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index bb8cde1..3a49c0d 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -61,30 +61,30 @@
short V = ((short)v) - 128;
short4 p;
- p.r = (Y * 298 + V * 409 + 128) >> 8;
- p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
- p.b = (Y * 298 + U * 516 + 128) >> 8;
- p.a = 255;
- if(p.r < 0) {
- p.r = 0;
+ p.x = (Y * 298 + V * 409 + 128) >> 8;
+ p.y = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
+ p.z = (Y * 298 + U * 516 + 128) >> 8;
+ p.w = 255;
+ if(p.x < 0) {
+ p.x = 0;
}
- if(p.r > 255) {
- p.r = 255;
+ if(p.x > 255) {
+ p.x = 255;
}
- if(p.g < 0) {
- p.g = 0;
+ if(p.y < 0) {
+ p.y = 0;
}
- if(p.g > 255) {
- p.g = 255;
+ if(p.y > 255) {
+ p.y = 255;
}
- if(p.b < 0) {
- p.b = 0;
+ if(p.z < 0) {
+ p.z = 0;
}
- if(p.b > 255) {
- p.b = 255;
+ if(p.z > 255) {
+ p.z = 255;
}
- return (uchar4){p.r, p.g, p.b, p.a};
+ return (uchar4){p.x, p.y, p.z, p.w};
}
diff --git a/cpu_ref/rsCpuRuntimeMath.cpp b/cpu_ref/rsCpuRuntimeMath.cpp
index f66677b..6c02303 100644
--- a/cpu_ref/rsCpuRuntimeMath.cpp
+++ b/cpu_ref/rsCpuRuntimeMath.cpp
@@ -14,7 +14,9 @@
* limitations under the License.
*/
+#ifndef RS_SERVER
#include <cutils/compiler.h>
+#endif
#include "rsContext.h"
#include "rsScriptC.h"
diff --git a/cpu_ref/rsCpuRuntimeStubs.cpp b/cpu_ref/rsCpuRuntimeStubs.cpp
index ceea9c4..7b8d557 100644
--- a/cpu_ref/rsCpuRuntimeStubs.cpp
+++ b/cpu_ref/rsCpuRuntimeStubs.cpp
@@ -21,12 +21,15 @@
#include "rsMatrix2x2.h"
#include "rsRuntime.h"
-#include "utils/Timers.h"
#include "rsCpuCore.h"
#include "rsCpuScript.h"
#include <time.h>
+#ifndef RS_SERVER
+#include "utils/Timers.h"
+#endif
+
using namespace android;
using namespace android::renderscript;
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 2ae4d83..7887474 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -23,10 +23,11 @@
//#include "rsdAllocation.h"
//#include "rsCpuIntrinsics.h"
-
+#ifndef RS_SERVER
#include "utils/Vector.h"
#include "utils/Timers.h"
#include "utils/StopWatch.h"
+#endif
#ifdef RS_COMPATIBILITY_LIB
#include <dlfcn.h>
@@ -177,10 +178,14 @@
#else
+#ifndef RS_SERVER
String8 scriptSOName(cacheDir);
scriptSOName = scriptSOName.getPathDir();
scriptSOName.appendPath("lib");
scriptSOName.append("/librs.");
+#else
+ String8 scriptSOName("lib");
+#endif
scriptSOName.append(resName);
scriptSOName.append(".so");
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index d418c22..b3b5bf9 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -18,10 +18,12 @@
#include "rsCpuScript.h"
#include "rsCpuScriptGroup.h"
+#ifndef RS_SERVER
#include <bcc/BCCContext.h>
#include <bcc/Renderscript/RSCompilerDriver.h>
#include <bcc/Renderscript/RSExecutable.h>
#include <bcc/Renderscript/RSInfo.h>
+#endif
#include "rsScript.h"
#include "rsScriptGroup.h"