Add RS intrinsic tests for Convolve.
Change-Id: I5321b7c54e076f5fc338ea634dd699bad663e65c
diff --git a/tests/src/android/renderscript/cts/intrinsic_colormatrix.rs b/tests/src/android/renderscript/cts/intrinsic_colormatrix.rs
new file mode 100644
index 0000000..1cab00b
--- /dev/null
+++ b/tests/src/android/renderscript/cts/intrinsic_colormatrix.rs
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "shared.rsh"
+
+static rs_matrix4x4 Mat;
+
+int gFormatIn;
+int gFormatOut;
+float4 gAdd;
+
+
+void init() {
+ rsMatrixLoadIdentity(&Mat);
+ gAdd = 0.f;
+}
+
+void setMatrix(rs_matrix4x4 m) {
+ Mat = m;
+}
+
+void test(rs_matrix4x4 m, float4 add, int formatIn, int formatOut) {
+
+}
+
+static float4 __attribute__((overloadable)) cvt_in(uchar4 in) {
+ float4 f = convert_float4(in);
+ f *= (1.f / 255.f);
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(uchar3 in) {
+ float4 f = {in.x, in.y, in.z, 0.f};
+ f *= (1.f / 255.f);
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(uchar2 in) {
+ float4 f = {in.x, in.y, 0.f, 0.f};
+ f *= (1.f / 255.f);
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(uchar in) {
+ float4 f = {in, 0.f, 0.f, 0.f};
+ f *= (1.f / 255.f);
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(float4 in) {
+ float4 f = in;
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(float3 in) {
+ float4 f = {in.x, in.y, in.z, 0.f};
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(float2 in) {
+ float4 f = {in.x, in.y, 0.f, 0.f};
+ return rsMatrixMultiply(&Mat, f);
+}
+static float4 __attribute__((overloadable)) cvt_in(float in) {
+ float4 f = {in, 0.f, 0.f, 0.f};
+ return rsMatrixMultiply(&Mat, f);
+}
+
+
+static uchar4 cvt_out_uchar4(float4 f) {
+ f = clamp(f, 0.f, 255.5f);
+ return convert_uchar4(f.xyzw);
+}
+static uchar3 cvt_out_uchar3(float4 f) {
+ f = clamp(f, 0.f, 255.5f);
+ return convert_uchar3(f.xyz);
+}
+static uchar2 cvt_out_uchar2(float4 f) {
+ f = clamp(f, 0.f, 255.5f);
+ return convert_uchar2(f.xy);
+}
+static uchar cvt_out_uchar(float4 f) {
+ f = clamp(f, 0.f, 255.5f);
+ return f.x;
+}
+static float4 cvt_out_float4(float4 f) {
+ return f;
+}
+static float3 cvt_out_float3(float4 f) {
+ return f.xyz;
+}
+static float2 cvt_out_float2(float4 f) {
+ return f.xy;
+}
+static float cvt_out_float(float4 f) {
+ return f.x;
+}
+
+#define KERN(tin, tout) \
+tout __attribute__((kernel)) k_##tin##_##tout(tin in) { \
+ float4 f = cvt_in(in); \
+ return cvt_out_##tout(f); \
+}
+
+#define KERN2(tin) \
+KERN(tin, uchar4) \
+KERN(tin, uchar3) \
+KERN(tin, uchar2) \
+KERN(tin, uchar) \
+KERN(tin, float4) \
+KERN(tin, float3) \
+KERN(tin, float2) \
+KERN(tin, float)
+
+KERN2(uchar4)
+KERN2(uchar3)
+KERN2(uchar2)
+KERN2(uchar)
+KERN2(float4)
+KERN2(float3)
+KERN2(float2)
+KERN2(float)
+
diff --git a/tests/src/android/renderscript/cts/intrinsic_convolve3x3.rs b/tests/src/android/renderscript/cts/intrinsic_convolve3x3.rs
new file mode 100644
index 0000000..77da230
--- /dev/null
+++ b/tests/src/android/renderscript/cts/intrinsic_convolve3x3.rs
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "shared.rsh"
+
+int32_t gWidth;
+int32_t gHeight;
+rs_allocation gIn;
+
+float gCoeffs[9];
+
+uchar4 __attribute__((kernel)) convolve_U4(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float4 p00 = convert_float4(rsGetElementAt_uchar4(gIn, x1, y1));
+ float4 p01 = convert_float4(rsGetElementAt_uchar4(gIn, x, y1));
+ float4 p02 = convert_float4(rsGetElementAt_uchar4(gIn, x2, y1));
+ float4 p10 = convert_float4(rsGetElementAt_uchar4(gIn, x1, y));
+ float4 p11 = convert_float4(rsGetElementAt_uchar4(gIn, x, y));
+ float4 p12 = convert_float4(rsGetElementAt_uchar4(gIn, x2, y));
+ float4 p20 = convert_float4(rsGetElementAt_uchar4(gIn, x1, y2));
+ float4 p21 = convert_float4(rsGetElementAt_uchar4(gIn, x, y2));
+ float4 p22 = convert_float4(rsGetElementAt_uchar4(gIn, x2, y2));
+ p00 *= gCoeffs[0];
+ p01 *= gCoeffs[1];
+ p02 *= gCoeffs[2];
+ p10 *= gCoeffs[3];
+ p11 *= gCoeffs[4];
+ p12 *= gCoeffs[5];
+ p20 *= gCoeffs[6];
+ p21 *= gCoeffs[7];
+ p22 *= gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ p20 += 0.5f;
+
+ p20 = clamp(p20, 0.f, 255.f);
+ return convert_uchar4(p20);
+}
+
+uchar3 __attribute__((kernel)) convolve_U3(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float3 p00 = convert_float3(rsGetElementAt_uchar3(gIn, x1, y1));
+ float3 p01 = convert_float3(rsGetElementAt_uchar3(gIn, x, y1));
+ float3 p02 = convert_float3(rsGetElementAt_uchar3(gIn, x2, y1));
+ float3 p10 = convert_float3(rsGetElementAt_uchar3(gIn, x1, y));
+ float3 p11 = convert_float3(rsGetElementAt_uchar3(gIn, x, y));
+ float3 p12 = convert_float3(rsGetElementAt_uchar3(gIn, x2, y));
+ float3 p20 = convert_float3(rsGetElementAt_uchar3(gIn, x1, y2));
+ float3 p21 = convert_float3(rsGetElementAt_uchar3(gIn, x, y2));
+ float3 p22 = convert_float3(rsGetElementAt_uchar3(gIn, x2, y2));
+ p00 *= gCoeffs[0];
+ p01 *= gCoeffs[1];
+ p02 *= gCoeffs[2];
+ p10 *= gCoeffs[3];
+ p11 *= gCoeffs[4];
+ p12 *= gCoeffs[5];
+ p20 *= gCoeffs[6];
+ p21 *= gCoeffs[7];
+ p22 *= gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ p20 += 0.5f;
+
+ p20 = clamp(p20, 0.f, 255.f);
+ return convert_uchar3(p20);
+}
+
+uchar2 __attribute__((kernel)) convolve_U2(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float2 p00 = convert_float2(rsGetElementAt_uchar2(gIn, x1, y1));
+ float2 p01 = convert_float2(rsGetElementAt_uchar2(gIn, x, y1));
+ float2 p02 = convert_float2(rsGetElementAt_uchar2(gIn, x2, y1));
+ float2 p10 = convert_float2(rsGetElementAt_uchar2(gIn, x1, y));
+ float2 p11 = convert_float2(rsGetElementAt_uchar2(gIn, x, y));
+ float2 p12 = convert_float2(rsGetElementAt_uchar2(gIn, x2, y));
+ float2 p20 = convert_float2(rsGetElementAt_uchar2(gIn, x1, y2));
+ float2 p21 = convert_float2(rsGetElementAt_uchar2(gIn, x, y2));
+ float2 p22 = convert_float2(rsGetElementAt_uchar2(gIn, x2, y2));
+ p00 *= gCoeffs[0];
+ p01 *= gCoeffs[1];
+ p02 *= gCoeffs[2];
+ p10 *= gCoeffs[3];
+ p11 *= gCoeffs[4];
+ p12 *= gCoeffs[5];
+ p20 *= gCoeffs[6];
+ p21 *= gCoeffs[7];
+ p22 *= gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ p20 += 0.5f;
+
+ p20 = clamp(p20, 0.f, 255.f);
+ return convert_uchar2(p20);
+}
+
+uchar __attribute__((kernel)) convolve_U1(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float p00 = rsGetElementAt_uchar(gIn, x1, y1);
+ float p01 = rsGetElementAt_uchar(gIn, x, y1);
+ float p02 = rsGetElementAt_uchar(gIn, x2, y1);
+ float p10 = rsGetElementAt_uchar(gIn, x1, y);
+ float p11 = rsGetElementAt_uchar(gIn, x, y);
+ float p12 = rsGetElementAt_uchar(gIn, x2, y);
+ float p20 = rsGetElementAt_uchar(gIn, x1, y2);
+ float p21 = rsGetElementAt_uchar(gIn, x, y2);
+ float p22 = rsGetElementAt_uchar(gIn, x2, y2);
+ p00 *= gCoeffs[0];
+ p01 *= gCoeffs[1];
+ p02 *= gCoeffs[2];
+ p10 *= gCoeffs[3];
+ p11 *= gCoeffs[4];
+ p12 *= gCoeffs[5];
+ p20 *= gCoeffs[6];
+ p21 *= gCoeffs[7];
+ p22 *= gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ p20 += 0.5f;
+
+ p20 = clamp(p20, 0.f, 255.f);
+ return (uchar)p20;
+}
+
+float4 __attribute__((kernel)) convolve_F4(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float4 p00 = rsGetElementAt_float4(gIn, x1, y1) * gCoeffs[0];
+ float4 p01 = rsGetElementAt_float4(gIn, x, y1) * gCoeffs[1];
+ float4 p02 = rsGetElementAt_float4(gIn, x2, y1) * gCoeffs[2];
+ float4 p10 = rsGetElementAt_float4(gIn, x1, y) * gCoeffs[3];
+ float4 p11 = rsGetElementAt_float4(gIn, x, y) * gCoeffs[4];
+ float4 p12 = rsGetElementAt_float4(gIn, x2, y) * gCoeffs[5];
+ float4 p20 = rsGetElementAt_float4(gIn, x1, y2) * gCoeffs[6];
+ float4 p21 = rsGetElementAt_float4(gIn, x, y2) * gCoeffs[7];
+ float4 p22 = rsGetElementAt_float4(gIn, x2, y2) * gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ return p20;
+}
+
+float3 __attribute__((kernel)) convolve_F3(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float3 p00 = rsGetElementAt_float3(gIn, x1, y1) * gCoeffs[0];
+ float3 p01 = rsGetElementAt_float3(gIn, x, y1) * gCoeffs[1];
+ float3 p02 = rsGetElementAt_float3(gIn, x2, y1) * gCoeffs[2];
+ float3 p10 = rsGetElementAt_float3(gIn, x1, y) * gCoeffs[3];
+ float3 p11 = rsGetElementAt_float3(gIn, x, y) * gCoeffs[4];
+ float3 p12 = rsGetElementAt_float3(gIn, x2, y) * gCoeffs[5];
+ float3 p20 = rsGetElementAt_float3(gIn, x1, y2) * gCoeffs[6];
+ float3 p21 = rsGetElementAt_float3(gIn, x, y2) * gCoeffs[7];
+ float3 p22 = rsGetElementAt_float3(gIn, x2, y2) * gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ return p20;
+}
+
+float2 __attribute__((kernel)) convolve_F2(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float2 p00 = rsGetElementAt_float2(gIn, x1, y1) * gCoeffs[0];
+ float2 p01 = rsGetElementAt_float2(gIn, x, y1) * gCoeffs[1];
+ float2 p02 = rsGetElementAt_float2(gIn, x2, y1) * gCoeffs[2];
+ float2 p10 = rsGetElementAt_float2(gIn, x1, y) * gCoeffs[3];
+ float2 p11 = rsGetElementAt_float2(gIn, x, y) * gCoeffs[4];
+ float2 p12 = rsGetElementAt_float2(gIn, x2, y) * gCoeffs[5];
+ float2 p20 = rsGetElementAt_float2(gIn, x1, y2) * gCoeffs[6];
+ float2 p21 = rsGetElementAt_float2(gIn, x, y2) * gCoeffs[7];
+ float2 p22 = rsGetElementAt_float2(gIn, x2, y2) * gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ return p20;
+}
+
+float __attribute__((kernel)) convolve_F1(uint32_t x, uint32_t y) {
+ uint32_t x1 = min((int32_t)x+1, gWidth-1);
+ uint32_t x2 = max((int32_t)x-1, 0);
+ uint32_t y1 = min((int32_t)y+1, gHeight-1);
+ uint32_t y2 = max((int32_t)y-1, 0);
+
+ float p00 = rsGetElementAt_float(gIn, x1, y1) * gCoeffs[0];
+ float p01 = rsGetElementAt_float(gIn, x, y1) * gCoeffs[1];
+ float p02 = rsGetElementAt_float(gIn, x2, y1) * gCoeffs[2];
+ float p10 = rsGetElementAt_float(gIn, x1, y) * gCoeffs[3];
+ float p11 = rsGetElementAt_float(gIn, x, y) * gCoeffs[4];
+ float p12 = rsGetElementAt_float(gIn, x2, y) * gCoeffs[5];
+ float p20 = rsGetElementAt_float(gIn, x1, y2) * gCoeffs[6];
+ float p21 = rsGetElementAt_float(gIn, x, y2) * gCoeffs[7];
+ float p22 = rsGetElementAt_float(gIn, x2, y2) * gCoeffs[8];
+
+ p00 += p01;
+ p02 += p10;
+ p11 += p12;
+ p20 += p21;
+
+ p22 += p00;
+ p02 += p11;
+
+ p20 += p22;
+ p20 += p02;
+ return p20;
+}
+
+
diff --git a/tests/src/android/renderscript/cts/intrinsic_convolve5x5.rs b/tests/src/android/renderscript/cts/intrinsic_convolve5x5.rs
new file mode 100644
index 0000000..9f9aa2b
--- /dev/null
+++ b/tests/src/android/renderscript/cts/intrinsic_convolve5x5.rs
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "shared.rsh"
+
+
+int32_t gWidth;
+int32_t gHeight;
+rs_allocation gIn;
+
+float gCoeffs[25];
+
+uchar4 __attribute__((kernel)) convolve_U4(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float4 p0 = convert_float4(rsGetElementAt_uchar4(gIn, x0, y0)) * gCoeffs[0]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x1, y0)) * gCoeffs[1]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x2, y0)) * gCoeffs[2]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x3, y0)) * gCoeffs[3]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x4, y0)) * gCoeffs[4];
+
+ float4 p1 = convert_float4(rsGetElementAt_uchar4(gIn, x0, y1)) * gCoeffs[5]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x1, y1)) * gCoeffs[6]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x2, y1)) * gCoeffs[7]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x3, y1)) * gCoeffs[8]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x4, y1)) * gCoeffs[9];
+
+ float4 p2 = convert_float4(rsGetElementAt_uchar4(gIn, x0, y2)) * gCoeffs[10]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x1, y2)) * gCoeffs[11]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x2, y2)) * gCoeffs[12]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x3, y2)) * gCoeffs[13]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x4, y2)) * gCoeffs[14];
+
+ float4 p3 = convert_float4(rsGetElementAt_uchar4(gIn, x0, y3)) * gCoeffs[15]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x1, y3)) * gCoeffs[16]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x2, y3)) * gCoeffs[17]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x3, y3)) * gCoeffs[18]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x4, y3)) * gCoeffs[19];
+
+ float4 p4 = convert_float4(rsGetElementAt_uchar4(gIn, x0, y4)) * gCoeffs[20]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x1, y4)) * gCoeffs[21]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x2, y4)) * gCoeffs[22]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x3, y4)) * gCoeffs[23]
+ + convert_float4(rsGetElementAt_uchar4(gIn, x4, y4)) * gCoeffs[24];
+
+ p0 = clamp(p0 + p1 + p2 + p3 + p4, 0.f, 255.f);
+ return convert_uchar4(p0);
+}
+
+uchar3 __attribute__((kernel)) convolve_U3(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float3 p0 = convert_float3(rsGetElementAt_uchar3(gIn, x0, y0)) * gCoeffs[0]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x1, y0)) * gCoeffs[1]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x2, y0)) * gCoeffs[2]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x3, y0)) * gCoeffs[3]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x4, y0)) * gCoeffs[4];
+
+ float3 p1 = convert_float3(rsGetElementAt_uchar3(gIn, x0, y1)) * gCoeffs[5]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x1, y1)) * gCoeffs[6]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x2, y1)) * gCoeffs[7]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x3, y1)) * gCoeffs[8]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x4, y1)) * gCoeffs[9];
+
+ float3 p2 = convert_float3(rsGetElementAt_uchar3(gIn, x0, y2)) * gCoeffs[10]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x1, y2)) * gCoeffs[11]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x2, y2)) * gCoeffs[12]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x3, y2)) * gCoeffs[13]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x4, y2)) * gCoeffs[14];
+
+ float3 p3 = convert_float3(rsGetElementAt_uchar3(gIn, x0, y3)) * gCoeffs[15]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x1, y3)) * gCoeffs[16]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x2, y3)) * gCoeffs[17]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x3, y3)) * gCoeffs[18]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x4, y3)) * gCoeffs[19];
+
+ float3 p4 = convert_float3(rsGetElementAt_uchar3(gIn, x0, y4)) * gCoeffs[20]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x1, y4)) * gCoeffs[21]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x2, y4)) * gCoeffs[22]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x3, y4)) * gCoeffs[23]
+ + convert_float3(rsGetElementAt_uchar3(gIn, x4, y4)) * gCoeffs[24];
+
+ p0 = clamp(p0 + p1 + p2 + p3 + p4, 0.f, 255.f);
+ return convert_uchar3(p0);
+}
+
+uchar2 __attribute__((kernel)) convolve_U2(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float2 p0 = convert_float2(rsGetElementAt_uchar2(gIn, x0, y0)) * gCoeffs[0]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x1, y0)) * gCoeffs[1]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x2, y0)) * gCoeffs[2]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x3, y0)) * gCoeffs[3]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x4, y0)) * gCoeffs[4];
+
+ float2 p1 = convert_float2(rsGetElementAt_uchar2(gIn, x0, y1)) * gCoeffs[5]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x1, y1)) * gCoeffs[6]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x2, y1)) * gCoeffs[7]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x3, y1)) * gCoeffs[8]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x4, y1)) * gCoeffs[9];
+
+ float2 p2 = convert_float2(rsGetElementAt_uchar2(gIn, x0, y2)) * gCoeffs[10]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x1, y2)) * gCoeffs[11]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x2, y2)) * gCoeffs[12]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x3, y2)) * gCoeffs[13]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x4, y2)) * gCoeffs[14];
+
+ float2 p3 = convert_float2(rsGetElementAt_uchar2(gIn, x0, y3)) * gCoeffs[15]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x1, y3)) * gCoeffs[16]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x2, y3)) * gCoeffs[17]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x3, y3)) * gCoeffs[18]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x4, y3)) * gCoeffs[19];
+
+ float2 p4 = convert_float2(rsGetElementAt_uchar2(gIn, x0, y4)) * gCoeffs[20]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x1, y4)) * gCoeffs[21]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x2, y4)) * gCoeffs[22]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x3, y4)) * gCoeffs[23]
+ + convert_float2(rsGetElementAt_uchar2(gIn, x4, y4)) * gCoeffs[24];
+
+ p0 = clamp(p0 + p1 + p2 + p3 + p4, 0.f, 255.f);
+ return convert_uchar2(p0);
+}
+
+uchar __attribute__((kernel)) convolve_U1(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float p0 = (float)(rsGetElementAt_uchar(gIn, x0, y0)) * gCoeffs[0]
+ + (float)(rsGetElementAt_uchar(gIn, x1, y0)) * gCoeffs[1]
+ + (float)(rsGetElementAt_uchar(gIn, x2, y0)) * gCoeffs[2]
+ + (float)(rsGetElementAt_uchar(gIn, x3, y0)) * gCoeffs[3]
+ + (float)(rsGetElementAt_uchar(gIn, x4, y0)) * gCoeffs[4];
+
+ float p1 = (float)(rsGetElementAt_uchar(gIn, x0, y1)) * gCoeffs[5]
+ + (float)(rsGetElementAt_uchar(gIn, x1, y1)) * gCoeffs[6]
+ + (float)(rsGetElementAt_uchar(gIn, x2, y1)) * gCoeffs[7]
+ + (float)(rsGetElementAt_uchar(gIn, x3, y1)) * gCoeffs[8]
+ + (float)(rsGetElementAt_uchar(gIn, x4, y1)) * gCoeffs[9];
+
+ float p2 = (float)(rsGetElementAt_uchar(gIn, x0, y2)) * gCoeffs[10]
+ + (float)(rsGetElementAt_uchar(gIn, x1, y2)) * gCoeffs[11]
+ + (float)(rsGetElementAt_uchar(gIn, x2, y2)) * gCoeffs[12]
+ + (float)(rsGetElementAt_uchar(gIn, x3, y2)) * gCoeffs[13]
+ + (float)(rsGetElementAt_uchar(gIn, x4, y2)) * gCoeffs[14];
+
+ float p3 = (float)(rsGetElementAt_uchar(gIn, x0, y3)) * gCoeffs[15]
+ + (float)(rsGetElementAt_uchar(gIn, x1, y3)) * gCoeffs[16]
+ + (float)(rsGetElementAt_uchar(gIn, x2, y3)) * gCoeffs[17]
+ + (float)(rsGetElementAt_uchar(gIn, x3, y3)) * gCoeffs[18]
+ + (float)(rsGetElementAt_uchar(gIn, x4, y3)) * gCoeffs[19];
+
+ float p4 = (float)(rsGetElementAt_uchar(gIn, x0, y4)) * gCoeffs[20]
+ + (float)(rsGetElementAt_uchar(gIn, x1, y4)) * gCoeffs[21]
+ + (float)(rsGetElementAt_uchar(gIn, x2, y4)) * gCoeffs[22]
+ + (float)(rsGetElementAt_uchar(gIn, x3, y4)) * gCoeffs[23]
+ + (float)(rsGetElementAt_uchar(gIn, x4, y4)) * gCoeffs[24];
+
+ return clamp(p0 + p1 + p2 + p3 + p4, 0.f, 255.f);
+}
+
+float4 __attribute__((kernel)) convolve_F4(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float4 p0 = rsGetElementAt_float4(gIn, x0, y0) * gCoeffs[0]
+ + rsGetElementAt_float4(gIn, x1, y0) * gCoeffs[1]
+ + rsGetElementAt_float4(gIn, x2, y0) * gCoeffs[2]
+ + rsGetElementAt_float4(gIn, x3, y0) * gCoeffs[3]
+ + rsGetElementAt_float4(gIn, x4, y0) * gCoeffs[4];
+
+ float4 p1 = rsGetElementAt_float4(gIn, x0, y1) * gCoeffs[5]
+ + rsGetElementAt_float4(gIn, x1, y1) * gCoeffs[6]
+ + rsGetElementAt_float4(gIn, x2, y1) * gCoeffs[7]
+ + rsGetElementAt_float4(gIn, x3, y1) * gCoeffs[8]
+ + rsGetElementAt_float4(gIn, x4, y1) * gCoeffs[9];
+
+ float4 p2 = rsGetElementAt_float4(gIn, x0, y2) * gCoeffs[10]
+ + rsGetElementAt_float4(gIn, x1, y2) * gCoeffs[11]
+ + rsGetElementAt_float4(gIn, x2, y2) * gCoeffs[12]
+ + rsGetElementAt_float4(gIn, x3, y2) * gCoeffs[13]
+ + rsGetElementAt_float4(gIn, x4, y2) * gCoeffs[14];
+
+ float4 p3 = rsGetElementAt_float4(gIn, x0, y3) * gCoeffs[15]
+ + rsGetElementAt_float4(gIn, x1, y3) * gCoeffs[16]
+ + rsGetElementAt_float4(gIn, x2, y3) * gCoeffs[17]
+ + rsGetElementAt_float4(gIn, x3, y3) * gCoeffs[18]
+ + rsGetElementAt_float4(gIn, x4, y3) * gCoeffs[19];
+
+ float4 p4 = rsGetElementAt_float4(gIn, x0, y4) * gCoeffs[20]
+ + rsGetElementAt_float4(gIn, x1, y4) * gCoeffs[21]
+ + rsGetElementAt_float4(gIn, x2, y4) * gCoeffs[22]
+ + rsGetElementAt_float4(gIn, x3, y4) * gCoeffs[23]
+ + rsGetElementAt_float4(gIn, x4, y4) * gCoeffs[24];
+
+ return p0 + p1 + p2 + p3 + p4;
+}
+
+float3 __attribute__((kernel)) convolve_F3(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float3 p0 = rsGetElementAt_float3(gIn, x0, y0) * gCoeffs[0]
+ + rsGetElementAt_float3(gIn, x1, y0) * gCoeffs[1]
+ + rsGetElementAt_float3(gIn, x2, y0) * gCoeffs[2]
+ + rsGetElementAt_float3(gIn, x3, y0) * gCoeffs[3]
+ + rsGetElementAt_float3(gIn, x4, y0) * gCoeffs[4];
+
+ float3 p1 = rsGetElementAt_float3(gIn, x0, y1) * gCoeffs[5]
+ + rsGetElementAt_float3(gIn, x1, y1) * gCoeffs[6]
+ + rsGetElementAt_float3(gIn, x2, y1) * gCoeffs[7]
+ + rsGetElementAt_float3(gIn, x3, y1) * gCoeffs[8]
+ + rsGetElementAt_float3(gIn, x4, y1) * gCoeffs[9];
+
+ float3 p2 = rsGetElementAt_float3(gIn, x0, y2) * gCoeffs[10]
+ + rsGetElementAt_float3(gIn, x1, y2) * gCoeffs[11]
+ + rsGetElementAt_float3(gIn, x2, y2) * gCoeffs[12]
+ + rsGetElementAt_float3(gIn, x3, y2) * gCoeffs[13]
+ + rsGetElementAt_float3(gIn, x4, y2) * gCoeffs[14];
+
+ float3 p3 = rsGetElementAt_float3(gIn, x0, y3) * gCoeffs[15]
+ + rsGetElementAt_float3(gIn, x1, y3) * gCoeffs[16]
+ + rsGetElementAt_float3(gIn, x2, y3) * gCoeffs[17]
+ + rsGetElementAt_float3(gIn, x3, y3) * gCoeffs[18]
+ + rsGetElementAt_float3(gIn, x4, y3) * gCoeffs[19];
+
+ float3 p4 = rsGetElementAt_float3(gIn, x0, y4) * gCoeffs[20]
+ + rsGetElementAt_float3(gIn, x1, y4) * gCoeffs[21]
+ + rsGetElementAt_float3(gIn, x2, y4) * gCoeffs[22]
+ + rsGetElementAt_float3(gIn, x3, y4) * gCoeffs[23]
+ + rsGetElementAt_float3(gIn, x4, y4) * gCoeffs[24];
+
+ return p0 + p1 + p2 + p3 + p4;
+}
+
+float2 __attribute__((kernel)) convolve_F2(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float2 p0 = rsGetElementAt_float2(gIn, x0, y0) * gCoeffs[0]
+ + rsGetElementAt_float2(gIn, x1, y0) * gCoeffs[1]
+ + rsGetElementAt_float2(gIn, x2, y0) * gCoeffs[2]
+ + rsGetElementAt_float2(gIn, x3, y0) * gCoeffs[3]
+ + rsGetElementAt_float2(gIn, x4, y0) * gCoeffs[4];
+
+ float2 p1 = rsGetElementAt_float2(gIn, x0, y1) * gCoeffs[5]
+ + rsGetElementAt_float2(gIn, x1, y1) * gCoeffs[6]
+ + rsGetElementAt_float2(gIn, x2, y1) * gCoeffs[7]
+ + rsGetElementAt_float2(gIn, x3, y1) * gCoeffs[8]
+ + rsGetElementAt_float2(gIn, x4, y1) * gCoeffs[9];
+
+ float2 p2 = rsGetElementAt_float2(gIn, x0, y2) * gCoeffs[10]
+ + rsGetElementAt_float2(gIn, x1, y2) * gCoeffs[11]
+ + rsGetElementAt_float2(gIn, x2, y2) * gCoeffs[12]
+ + rsGetElementAt_float2(gIn, x3, y2) * gCoeffs[13]
+ + rsGetElementAt_float2(gIn, x4, y2) * gCoeffs[14];
+
+ float2 p3 = rsGetElementAt_float2(gIn, x0, y3) * gCoeffs[15]
+ + rsGetElementAt_float2(gIn, x1, y3) * gCoeffs[16]
+ + rsGetElementAt_float2(gIn, x2, y3) * gCoeffs[17]
+ + rsGetElementAt_float2(gIn, x3, y3) * gCoeffs[18]
+ + rsGetElementAt_float2(gIn, x4, y3) * gCoeffs[19];
+
+ float2 p4 = rsGetElementAt_float2(gIn, x0, y4) * gCoeffs[20]
+ + rsGetElementAt_float2(gIn, x1, y4) * gCoeffs[21]
+ + rsGetElementAt_float2(gIn, x2, y4) * gCoeffs[22]
+ + rsGetElementAt_float2(gIn, x3, y4) * gCoeffs[23]
+ + rsGetElementAt_float2(gIn, x4, y4) * gCoeffs[24];
+
+ return p0 + p1 + p2 + p3 + p4;
+}
+
+float __attribute__((kernel)) convolve_F1(uint32_t x, uint32_t y) {
+ uint32_t x0 = max((int32_t)x-2, 0);
+ uint32_t x1 = max((int32_t)x-1, 0);
+ uint32_t x2 = x;
+ uint32_t x3 = min((int32_t)x+1, gWidth-1);
+ uint32_t x4 = min((int32_t)x+2, gWidth-1);
+
+ uint32_t y0 = max((int32_t)y-2, 0);
+ uint32_t y1 = max((int32_t)y-1, 0);
+ uint32_t y2 = y;
+ uint32_t y3 = min((int32_t)y+1, gHeight-1);
+ uint32_t y4 = min((int32_t)y+2, gHeight-1);
+
+ float p0 = rsGetElementAt_float(gIn, x0, y0) * gCoeffs[0]
+ + rsGetElementAt_float(gIn, x1, y0) * gCoeffs[1]
+ + rsGetElementAt_float(gIn, x2, y0) * gCoeffs[2]
+ + rsGetElementAt_float(gIn, x3, y0) * gCoeffs[3]
+ + rsGetElementAt_float(gIn, x4, y0) * gCoeffs[4];
+
+ float p1 = rsGetElementAt_float(gIn, x0, y1) * gCoeffs[5]
+ + rsGetElementAt_float(gIn, x1, y1) * gCoeffs[6]
+ + rsGetElementAt_float(gIn, x2, y1) * gCoeffs[7]
+ + rsGetElementAt_float(gIn, x3, y1) * gCoeffs[8]
+ + rsGetElementAt_float(gIn, x4, y1) * gCoeffs[9];
+
+ float p2 = rsGetElementAt_float(gIn, x0, y2) * gCoeffs[10]
+ + rsGetElementAt_float(gIn, x1, y2) * gCoeffs[11]
+ + rsGetElementAt_float(gIn, x2, y2) * gCoeffs[12]
+ + rsGetElementAt_float(gIn, x3, y2) * gCoeffs[13]
+ + rsGetElementAt_float(gIn, x4, y2) * gCoeffs[14];
+
+ float p3 = rsGetElementAt_float(gIn, x0, y3) * gCoeffs[15]
+ + rsGetElementAt_float(gIn, x1, y3) * gCoeffs[16]
+ + rsGetElementAt_float(gIn, x2, y3) * gCoeffs[17]
+ + rsGetElementAt_float(gIn, x3, y3) * gCoeffs[18]
+ + rsGetElementAt_float(gIn, x4, y3) * gCoeffs[19];
+
+ float p4 = rsGetElementAt_float(gIn, x0, y4) * gCoeffs[20]
+ + rsGetElementAt_float(gIn, x1, y4) * gCoeffs[21]
+ + rsGetElementAt_float(gIn, x2, y4) * gCoeffs[22]
+ + rsGetElementAt_float(gIn, x3, y4) * gCoeffs[23]
+ + rsGetElementAt_float(gIn, x4, y4) * gCoeffs[24];
+
+ return p0 + p1 + p2 + p3 + p4;
+}
+
+
+
diff --git a/tests/src/android/renderscript/cts/verify.rs b/tests/src/android/renderscript/cts/verify.rs
new file mode 100644
index 0000000..d100eb4
--- /dev/null
+++ b/tests/src/android/renderscript/cts/verify.rs
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "shared.rsh"
+
+rs_allocation gIn1;
+rs_allocation gIn2;
+float gAllowedError;
+
+static bool hadError = false;
+
+static bool compare_float(float f1, float f2) {
+ if (fabs(f1-f2) > 0.0001f) {
+ hadError = true;
+ return false;
+ }
+ return true;
+}
+
+static void verify_float4(rs_allocation in1, rs_allocation in2)
+{
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ float4 p1 = rsGetElementAt_float4(in1, x, y);
+ float4 p2 = rsGetElementAt_float4(in2, x, y);
+ bool e = !compare_float(p1.x, p2.x);
+ e |= !compare_float(p1.y, p2.y);
+ e |= !compare_float(p1.z, p2.z);
+ e |= !compare_float(p1.w, p2.w);
+ if (e) {
+ rsDebug("verify_float4 x", x);
+ rsDebug("verify_float4 y", y);
+ rsDebug("verify_float4 p1", p1);
+ rsDebug("verify_float4 p2", p2);
+ return;
+ }
+ }
+ }
+}
+
+static void verify_float3(rs_allocation in1, rs_allocation in2)
+{
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ float3 p1 = rsGetElementAt_float3(in1, x, y);
+ float3 p2 = rsGetElementAt_float3(in2, x, y);
+ bool e = !compare_float(p1.x, p2.x);
+ e |= !compare_float(p1.y, p2.y);
+ e |= !compare_float(p1.z, p2.z);
+ if (e) {
+ rsDebug("verify_float4 x", x);
+ rsDebug("verify_float4 y", y);
+ rsDebug("verify_float4 p1", p1);
+ rsDebug("verify_float4 p2", p2);
+ return;
+ }
+ }
+ }
+}
+
+static void verify_float2(rs_allocation in1, rs_allocation in2)
+{
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ float2 p1 = rsGetElementAt_float2(in1, x, y);
+ float2 p2 = rsGetElementAt_float2(in2, x, y);
+ bool e = !compare_float(p1.x, p2.x);
+ e |= !compare_float(p1.y, p2.y);
+ if (e) {
+ rsDebug("verify_float4 x", x);
+ rsDebug("verify_float4 y", y);
+ rsDebug("verify_float4 p1", p1);
+ rsDebug("verify_float4 p2", p2);
+ return;
+ }
+ }
+ }
+}
+
+static void verify_float(rs_allocation in1, rs_allocation in2)
+{
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ float p1 = rsGetElementAt_float(in1, x, y);
+ float p2 = rsGetElementAt_float(in2, x, y);
+ bool e = !compare_float(p1, p2);
+ if (e) {
+ rsDebug("verify_float4 x", x);
+ rsDebug("verify_float4 y", y);
+ rsDebug("verify_float4 p1", p1);
+ rsDebug("verify_float4 p2", p2);
+ return;
+ }
+ }
+ }
+}
+
+static void verify_uchar4(rs_allocation in1, rs_allocation in2)
+{
+ int merr = 0;
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ int4 p1 = convert_int4(rsGetElementAt_uchar4(in1, x, y));
+ int4 p2 = convert_int4(rsGetElementAt_uchar4(in2, x, y));
+ int4 d = convert_int4(abs(p1 - p2));
+ int e = 0;
+ e = max(e, d.x);
+ e = max(e, d.y);
+ e = max(e, d.z);
+ e = max(e, d.w);
+ if (e != 0) {
+ rsDebug("verify_uchar4 x", x);
+ rsDebug("verify_uchar4 y", y);
+ rsDebug("verify_uchar4 p1", p1);
+ rsDebug("verify_uchar4 p2", p2);
+ return;
+ }
+ merr = max(e, merr);
+ }
+ }
+}
+
+static void verify_uchar3(rs_allocation in1, rs_allocation in2)
+{
+ int merr = 0;
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ int3 p1 = convert_int3(rsGetElementAt_uchar3(in1, x, y));
+ int3 p2 = convert_int3(rsGetElementAt_uchar3(in2, x, y));
+ int3 d = convert_int3(abs(p1 - p2));
+ int e = 0;
+ e = max(e, d.x);
+ e = max(e, d.y);
+ e = max(e, d.z);
+ if (e != 0) {
+ rsDebug("verify_uchar3 x", x);
+ rsDebug("verify_uchar3 y", y);
+ rsDebug("verify_uchar3 p1", p1);
+ rsDebug("verify_uchar3 p2", p2);
+ return;
+ }
+ merr = max(e, merr);
+ }
+ }
+}
+
+static void verify_uchar2(rs_allocation in1, rs_allocation in2)
+{
+ int merr = 0;
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ int2 p1 = convert_int2(rsGetElementAt_uchar2(in1, x, y));
+ int2 p2 = convert_int2(rsGetElementAt_uchar2(in2, x, y));
+ int2 d = convert_int2(abs(p1 - p2));
+ int e = 0;
+ e = max(e, d.x);
+ e = max(e, d.y);
+ if (e != 0) {
+ rsDebug("verify_uchar2 x", x);
+ rsDebug("verify_uchar2 y", y);
+ rsDebug("verify_uchar2 p1", p1);
+ rsDebug("verify_uchar2 p2", p2);
+ return;
+ }
+ merr = max(e, merr);
+ }
+ }
+}
+
+static void verify_uchar(rs_allocation in1, rs_allocation in2)
+{
+ int merr = 0;
+ uint32_t w = rsAllocationGetDimX(in1);
+ uint32_t h = rsAllocationGetDimY(in1);
+ for (uint32_t y=0; y < h; y++) {
+ for (uint32_t x=0; x < w; x++) {
+ int p1 = rsGetElementAt_uchar(in1, x, y);
+ int p2 = rsGetElementAt_uchar(in2, x, y);
+ int e = abs(p1 - p2);
+ if (e != 0) {
+ rsDebug("verify_uchar4 x", x);
+ rsDebug("verify_uchar4 y", y);
+ rsDebug("verify_uchar4 p1", p1);
+ rsDebug("verify_uchar4 p2", p2);
+ return;
+ }
+ merr = max(e, merr);
+ }
+ }
+}
+
+void verify(rs_allocation in1, rs_allocation in2, int etype)
+{
+ switch(etype) {
+ case 0:
+ verify_uchar4(in1, in2);
+ break;
+ case 1:
+ verify_uchar3(in1, in2);
+ break;
+ case 2:
+ verify_uchar2(in1, in2);
+ break;
+ case 3:
+ verify_uchar(in1, in2);
+ break;
+ case 4:
+ verify_float4(in1, in2);
+ break;
+ case 5:
+ verify_float3(in1, in2);
+ break;
+ case 6:
+ verify_float2(in1, in2);
+ break;
+ case 7:
+ verify_float(in1, in2);
+ break;
+ }
+
+}
+
+void checkError()
+{
+ if (hadError) {
+ rsSendToClientBlocking(RS_MSG_TEST_FAILED);
+ } else {
+ rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+ }
+}
diff --git a/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicBase.java b/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicBase.java
new file mode 100644
index 0000000..f5a6eee
--- /dev/null
+++ b/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicBase.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.renderscript.cts;
+
+import android.util.Log;
+import android.renderscript.RenderScript;
+import android.renderscript.Allocation;
+import android.renderscript.Element;
+import android.renderscript.Type;
+
+public class IntrinsicBase extends RSBaseCompute {
+ protected final String TAG = "Img";
+
+ protected Allocation mAllocSrc;
+ protected Allocation mAllocRef;
+ protected Allocation mAllocDst;
+ protected ScriptC_verify mVerify;
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ mVerify = new ScriptC_verify(mRS);
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ if (mVerify != null) {
+ mVerify.destroy();
+ mVerify = null;
+ }
+ super.tearDown();
+ }
+
+ protected void makeSource(int w, int h, Element e) {
+ System.gc();
+
+ if (mAllocSrc != null) {
+ mAllocSrc.destroy();
+ }
+ if (mAllocRef != null) {
+ mAllocRef.destroy();
+ }
+ if (mAllocDst != null) {
+ mAllocDst.destroy();
+ }
+
+ Type.Builder tb = new Type.Builder(mRS, e);
+ tb.setX(w);
+ tb.setY(h);
+ Type t = tb.create();
+ mAllocSrc = Allocation.createTyped(mRS, t);
+ mAllocRef = Allocation.createTyped(mRS, t);
+ mAllocDst = Allocation.createTyped(mRS, t);
+
+ java.util.Random r = new java.util.Random(100);
+
+ int vs = e.getVectorSize();
+ if (e.getDataType() == Element.DataType.FLOAT_32) {
+ float f[] = new float[w * h * vs];
+ for (int y=0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ for (int v = 0; v < vs; v++) {
+ f[(y * w + x) * vs + v] = r.nextFloat();
+ }
+ }
+ }
+ mAllocSrc.copyFromUnchecked(f);
+ }
+
+ if (e.getDataType() == Element.DataType.UNSIGNED_8) {
+ byte f[] = new byte[w * h * vs];
+ for (int y=0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ for (int v = 0; v < vs; v++) {
+ f[(y * w + x) * vs + v] = (byte)r.nextInt(256);
+ }
+ }
+ }
+ mAllocSrc.copyFromUnchecked(f);
+ }
+
+ }
+
+
+ protected void checkError() {
+ mRS.finish();
+ mVerify.invoke_checkError();
+ waitForMessage();
+ checkForErrors();
+ }
+
+}
diff --git a/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicConvolve3x3.java b/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicConvolve3x3.java
new file mode 100644
index 0000000..e74536b
--- /dev/null
+++ b/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicConvolve3x3.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.renderscript.cts;
+
+import android.renderscript.*;
+import android.util.Log;
+
+public class IntrinsicConvolve3x3 extends IntrinsicBase {
+ private void testConvolve3(int w, int h, Element.DataType dt, int vecSize, int en) {
+ float cf1[] = {0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f};
+ float cf2[] = {0.f, -1.f, 0.f, -1.f, 5.f, -1.f, 0.f, -1.f, 0.f};
+
+
+ Element e;
+ if (vecSize > 1) {
+ e = Element.createVector(mRS, dt, vecSize);
+ } else {
+ if (dt == Element.DataType.UNSIGNED_8) {
+ e = Element.U8(mRS);
+ } else {
+ e = Element.F32(mRS);
+ }
+ }
+
+ System.gc();
+ makeSource(w, h, e);
+
+
+ ScriptIntrinsicConvolve3x3 si = ScriptIntrinsicConvolve3x3.create(mRS, e);
+ si.setCoefficients(cf1);
+ si.setInput(mAllocSrc);
+ si.forEach(mAllocRef);
+
+ ScriptC_intrinsic_convolve3x3 sr = new ScriptC_intrinsic_convolve3x3(mRS);
+ sr.set_gCoeffs(cf1);
+ sr.set_gIn(mAllocSrc);
+ sr.set_gWidth(w);
+ sr.set_gHeight(h);
+ if (dt == Element.DataType.UNSIGNED_8) {
+ switch(vecSize) {
+ case 4:
+ sr.forEach_convolve_U4(mAllocDst);
+ break;
+ case 3:
+ sr.forEach_convolve_U3(mAllocDst);
+ break;
+ case 2:
+ sr.forEach_convolve_U2(mAllocDst);
+ break;
+ case 1:
+ sr.forEach_convolve_U1(mAllocDst);
+ break;
+ }
+ } else {
+ switch(vecSize) {
+ case 4:
+ sr.forEach_convolve_F4(mAllocDst);
+ break;
+ case 3:
+ sr.forEach_convolve_F3(mAllocDst);
+ break;
+ case 2:
+ sr.forEach_convolve_F2(mAllocDst);
+ break;
+ case 1:
+ sr.forEach_convolve_F1(mAllocDst);
+ break;
+ }
+ }
+
+ android.util.Log.e("RSI test", "test convolve U8_" + vecSize + " 1 " + w + ", " + h);
+ mVerify.invoke_verify(mAllocRef, mAllocDst, en);
+
+ si.setCoefficients(cf2);
+ sr.set_gCoeffs(cf2);
+ si.forEach(mAllocRef);
+ if (dt == Element.DataType.UNSIGNED_8) {
+ switch(vecSize) {
+ case 4:
+ sr.forEach_convolve_U4(mAllocDst);
+ break;
+ case 3:
+ sr.forEach_convolve_U3(mAllocDst);
+ break;
+ case 2:
+ sr.forEach_convolve_U2(mAllocDst);
+ break;
+ case 1:
+ sr.forEach_convolve_U1(mAllocDst);
+ break;
+ }
+ } else {
+ switch(vecSize) {
+ case 4:
+ sr.forEach_convolve_F4(mAllocDst);
+ break;
+ case 3:
+ sr.forEach_convolve_F3(mAllocDst);
+ break;
+ case 2:
+ sr.forEach_convolve_F2(mAllocDst);
+ break;
+ case 1:
+ sr.forEach_convolve_F1(mAllocDst);
+ break;
+ }
+ }
+ android.util.Log.e("RSI test", "test convolve U8_" + vecSize + " 2 " + w + ", " + h);
+ mVerify.invoke_verify(mAllocRef, mAllocDst, en);
+ mRS.finish();
+ }
+
+
+ public void test() {
+ testConvolve3(100, 100, Element.DataType.UNSIGNED_8, 4, 0);
+ testConvolve3(100, 100, Element.DataType.UNSIGNED_8, 3, 1);
+ testConvolve3(100, 100, Element.DataType.UNSIGNED_8, 2, 2);
+ testConvolve3(100, 100, Element.DataType.UNSIGNED_8, 1, 3);
+
+ testConvolve3(100, 100, Element.DataType.FLOAT_32, 4, 4);
+ testConvolve3(100, 100, Element.DataType.FLOAT_32, 3, 5);
+ testConvolve3(100, 100, Element.DataType.FLOAT_32, 2, 6);
+ testConvolve3(100, 100, Element.DataType.FLOAT_32, 1, 7);
+ checkError();
+ }
+
+
+}
diff --git a/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicConvolve5x5.java b/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicConvolve5x5.java
new file mode 100644
index 0000000..500b5aa
--- /dev/null
+++ b/tests/tests/renderscript/src/android/renderscript/cts/IntrinsicConvolve5x5.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.renderscript.cts;
+
+import android.renderscript.*;
+import android.util.Log;
+
+public class IntrinsicConvolve5x5 extends IntrinsicBase {
+ private void test5(ScriptC_intrinsic_convolve5x5 sr, ScriptIntrinsicConvolve5x5 si,
+ Element e, float cf[], String name, int num, int w, int h, int en) {
+ si.setCoefficients(cf);
+ si.setInput(mAllocSrc);
+ si.forEach(mAllocRef);
+
+ sr.set_gWidth(w);
+ sr.set_gHeight(h);
+ sr.set_gCoeffs(cf);
+ sr.set_gIn(mAllocSrc);
+ if (e.getDataType() == Element.DataType.UNSIGNED_8) {
+ switch(e.getVectorSize()) {
+ case 4:
+ sr.forEach_convolve_U4(mAllocDst);
+ break;
+ case 3:
+ sr.forEach_convolve_U3(mAllocDst);
+ break;
+ case 2:
+ sr.forEach_convolve_U2(mAllocDst);
+ break;
+ case 1:
+ sr.forEach_convolve_U1(mAllocDst);
+ break;
+ }
+ } else {
+ switch(e.getVectorSize()) {
+ case 4:
+ sr.forEach_convolve_F4(mAllocDst);
+ break;
+ case 3:
+ sr.forEach_convolve_F3(mAllocDst);
+ break;
+ case 2:
+ sr.forEach_convolve_F2(mAllocDst);
+ break;
+ case 1:
+ sr.forEach_convolve_F1(mAllocDst);
+ break;
+ }
+ }
+
+ android.util.Log.e("RSI test", name + " " + e.getVectorSize() + " " + num + " " + w + ", " + h);
+ mVerify.invoke_verify(mAllocRef, mAllocDst, en);
+ mRS.finish();
+ }
+
+ private void testConvolve5(int w, int h, Element.DataType dt, int vecSize, int en) {
+ float cf1[] = { 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 1.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f};
+ float cf2[] = {-1.f, -1.f, -1.f, -1.f, -1.f,
+ -1.f, 0.f, 0.f, 0.f, -1.f,
+ -1.f, 0.f, 16.f, 0.f, -1.f,
+ -1.f, 0.f, 0.f, 0.f, -1.f,
+ -1.f, -1.f, -1.f, -1.f, -1.f};
+
+ Element e;
+ if (vecSize > 1) {
+ e = Element.createVector(mRS, dt, vecSize);
+ } else {
+ if (dt == Element.DataType.UNSIGNED_8) {
+ e = Element.U8(mRS);
+ } else {
+ e = Element.F32(mRS);
+ }
+ }
+
+ makeSource(w, h, e);
+
+
+ ScriptIntrinsicConvolve5x5 si = ScriptIntrinsicConvolve5x5.create(mRS, e);
+ ScriptC_intrinsic_convolve5x5 sr = new ScriptC_intrinsic_convolve5x5(mRS);
+ test5(sr, si, e, cf1, "test convolve", 1, w, h, en);
+ test5(sr, si, e, cf2, "test convolve", 2, w, h, en);
+ }
+
+
+ public void test() {
+ testConvolve5(100, 100, Element.DataType.UNSIGNED_8, 4, 0);
+ testConvolve5(100, 100, Element.DataType.UNSIGNED_8, 3, 1);
+ testConvolve5(100, 100, Element.DataType.UNSIGNED_8, 2, 2);
+ testConvolve5(100, 100, Element.DataType.UNSIGNED_8, 1, 3);
+
+ testConvolve5(100, 100, Element.DataType.FLOAT_32, 4, 4);
+ testConvolve5(100, 100, Element.DataType.FLOAT_32, 3, 5);
+ testConvolve5(100, 100, Element.DataType.FLOAT_32, 2, 6);
+ testConvolve5(100, 100, Element.DataType.FLOAT_32, 1, 7);
+ checkError();
+ }
+
+
+}