Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2020 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "include/utils/SkRandom.h" |
| 9 | #include "src/core/SkGeometry.h" |
| 10 | #include "src/gpu/GrVx.h" |
| 11 | #include "tests/Test.h" |
| 12 | #include <limits> |
Chris Dalton | d461f6e | 2020-11-23 13:51:06 -0700 | [diff] [blame] | 13 | #include <numeric> |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 14 | |
| 15 | using namespace grvx; |
| 16 | using skvx::bit_pun; |
| 17 | |
| 18 | DEF_TEST(grvx_cross_dot, r) { |
| 19 | REPORTER_ASSERT(r, grvx::cross({0,1}, {0,1}) == 0); |
| 20 | REPORTER_ASSERT(r, grvx::cross({1,0}, {1,0}) == 0); |
| 21 | REPORTER_ASSERT(r, grvx::cross({1,1}, {1,1}) == 0); |
| 22 | REPORTER_ASSERT(r, grvx::cross({1,1}, {1,-1}) == -2); |
| 23 | REPORTER_ASSERT(r, grvx::cross({1,1}, {-1,1}) == 2); |
| 24 | |
| 25 | REPORTER_ASSERT(r, grvx::dot({0,1}, {1,0}) == 0); |
| 26 | REPORTER_ASSERT(r, grvx::dot({1,0}, {0,1}) == 0); |
| 27 | REPORTER_ASSERT(r, grvx::dot({1,1}, {1,-1}) == 0); |
| 28 | REPORTER_ASSERT(r, grvx::dot({1,1}, {1,1}) == 2); |
| 29 | REPORTER_ASSERT(r, grvx::dot({1,1}, {-1,-1}) == -2); |
| 30 | |
| 31 | SkRandom rand; |
| 32 | for (int i = 0; i < 100; ++i) { |
| 33 | float a=rand.nextRangeF(-1,1), b=rand.nextRangeF(-1,1), c=rand.nextRangeF(-1,1), |
| 34 | d=rand.nextRangeF(-1,1); |
| 35 | constexpr static float kTolerance = 1.f / (1 << 20); |
| 36 | REPORTER_ASSERT(r, SkScalarNearlyEqual( |
| 37 | grvx::cross({a,b}, {c,d}), SkPoint::CrossProduct({a,b}, {c,d}), kTolerance)); |
| 38 | REPORTER_ASSERT(r, SkScalarNearlyEqual( |
| 39 | grvx::dot({a,b}, {c,d}), SkPoint::DotProduct({a,b}, {c,d}), kTolerance)); |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | static bool check_approx_acos(skiatest::Reporter* r, float x, float approx_acos_x) { |
| 44 | float acosf_x = acosf(x); |
| 45 | float error = acosf_x - approx_acos_x; |
Chris Dalton | bb33be2 | 2021-02-24 16:30:34 -0700 | [diff] [blame] | 46 | if (!(fabsf(error) <= GRVX_APPROX_ACOS_MAX_ERROR)) { |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 47 | ERRORF(r, "Larger-than-expected error from grvx::approx_acos\n" |
| 48 | " x= %f\n" |
| 49 | " approx_acos_x= %f (%f degrees\n" |
| 50 | " acosf_x= %f (%f degrees\n" |
| 51 | " error= %f (%f degrees)\n" |
| 52 | " tolerance= %f (%f degrees)\n\n", |
| 53 | x, approx_acos_x, SkRadiansToDegrees(approx_acos_x), acosf_x, |
| 54 | SkRadiansToDegrees(acosf_x), error, SkRadiansToDegrees(error), |
Chris Dalton | bb33be2 | 2021-02-24 16:30:34 -0700 | [diff] [blame] | 55 | GRVX_APPROX_ACOS_MAX_ERROR, SkRadiansToDegrees(GRVX_APPROX_ACOS_MAX_ERROR)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 56 | return false; |
| 57 | } |
| 58 | return true; |
| 59 | } |
| 60 | |
| 61 | DEF_TEST(grvx_approx_acos, r) { |
| 62 | float4 boundaries = approx_acos(float4{-1, 0, 1, 0}); |
| 63 | check_approx_acos(r, -1, boundaries[0]); |
| 64 | check_approx_acos(r, 0, boundaries[1]); |
| 65 | check_approx_acos(r, +1, boundaries[2]); |
| 66 | |
| 67 | // Select a distribution of starting points around which to begin testing approx_acos. These |
| 68 | // fall roughly around the known minimum and maximum errors. No need to include -1, 0, or 1 |
| 69 | // since those were just tested above. (Those are tricky because 0 is an inflection and the |
| 70 | // derivative is infinite at 1 and -1.) |
| 71 | constexpr static int N = 8; |
| 72 | vec<8> x = {-.99f, -.8f, -.4f, -.2f, .2f, .4f, .8f, .99f}; |
| 73 | |
| 74 | // Converge at the various local minima and maxima of "approx_acos(x) - cosf(x)" and verify that |
| 75 | // approx_acos is always within "kTolerance" degrees of the expected answer. |
| 76 | vec<N> err_; |
| 77 | for (int iter = 0; iter < 10; ++iter) { |
| 78 | // Run our approximate inverse cosine approximation. |
| 79 | vec<N> approx_acos_x = approx_acos(x); |
| 80 | |
| 81 | // Find d/dx(error) |
| 82 | // = d/dx(approx_acos(x) - acos(x)) |
| 83 | // = (f'g - fg')/gg + 1/sqrt(1 - x^2), [where f = bx^3 + ax, g = dx^4 + cx^2 + 1] |
| 84 | vec<N> xx = x*x; |
| 85 | vec<N> a = -0.939115566365855f; |
| 86 | vec<N> b = 0.9217841528914573f; |
| 87 | vec<N> c = -1.2845906244690837f; |
| 88 | vec<N> d = 0.295624144969963174f; |
| 89 | vec<N> f = (b*xx + a)*x; |
| 90 | vec<N> f_ = 3*b*xx + a; |
| 91 | vec<N> g = (d*xx + c)*xx + 1; |
| 92 | vec<N> g_ = (4*d*xx + 2*c)*x; |
| 93 | vec<N> gg = g*g; |
| 94 | vec<N> q = skvx::sqrt(1 - xx); |
| 95 | err_ = (f_*g - f*g_)/gg + 1/q; |
| 96 | |
| 97 | // Find d^2/dx^2(error) |
| 98 | // = ((f''g - fg'')g^2 - (f'g - fg')2gg') / g^4 + x(1 - x^2)^(-3/2) |
| 99 | // = ((f''g - fg'')g - (f'g - fg')2g') / g^3 + x(1 - x^2)^(-3/2) |
| 100 | vec<N> f__ = 6*b*x; |
| 101 | vec<N> g__ = 12*d*xx + 2*c; |
| 102 | vec<N> err__ = ((f__*g - f*g__)*g - (f_*g - f*g_)*2*g_) / (gg*g) + x/((1 - xx)*q); |
| 103 | |
| 104 | #if 0 |
| 105 | SkDebugf("\n\niter %i\n", iter); |
| 106 | #endif |
| 107 | // Ensure each lane's approximation is within maximum error. |
| 108 | for (int j = 0; j < N; ++j) { |
| 109 | #if 0 |
| 110 | SkDebugf("x=%f err=%f err'=%f err''=%f\n", |
| 111 | x[j], SkRadiansToDegrees(approx_acos_x[j] - acosf(x[j])), |
| 112 | SkRadiansToDegrees(err_[j]), SkRadiansToDegrees(err__[j])); |
| 113 | #endif |
| 114 | if (!check_approx_acos(r, x[j], approx_acos_x[j])) { |
| 115 | return; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | // Use Newton's method to update the x values to locations closer to their local minimum or |
| 120 | // maximum. (This is where d/dx(error) == 0.) |
| 121 | x -= err_/err__; |
| 122 | x = skvx::pin(x, vec<N>(-.99f), vec<N>(.99f)); |
| 123 | } |
| 124 | |
| 125 | // Ensure each lane converged to a local minimum or maximum. |
| 126 | for (int j = 0; j < N; ++j) { |
| 127 | REPORTER_ASSERT(r, SkScalarNearlyZero(err_[j])); |
| 128 | } |
| 129 | |
| 130 | // Make sure we found all the actual known locations of local min/max error. |
| 131 | for (float knownRoot : {-0.983536f, -0.867381f, -0.410923f, 0.410923f, 0.867381f, 0.983536f}) { |
| 132 | REPORTER_ASSERT(r, skvx::any(skvx::abs(x - knownRoot) < SK_ScalarNearlyZero)); |
| 133 | } |
| 134 | } |
| 135 | |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 136 | static float precise_angle_between_vectors(SkPoint a, SkPoint b) { |
| 137 | if (a.isZero() || b.isZero()) { |
| 138 | return 0; |
| 139 | } |
| 140 | double ax=a.fX, ay=a.fY, bx=b.fX, by=b.fY; |
| 141 | double theta = (ax*bx + ay*by) / sqrt(ax*ax + ay*ay) / sqrt(bx*bx + by*by); |
| 142 | return (float)acos(theta); |
| 143 | } |
| 144 | |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 145 | static bool check_approx_angle_between_vectors(skiatest::Reporter* r, SkVector a, SkVector b, |
| 146 | float approxTheta) { |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 147 | float expectedTheta = precise_angle_between_vectors(a, b); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 148 | float error = expectedTheta - approxTheta; |
Chris Dalton | bb33be2 | 2021-02-24 16:30:34 -0700 | [diff] [blame] | 149 | if (!(fabsf(error) <= GRVX_APPROX_ACOS_MAX_ERROR + SK_ScalarNearlyZero)) { |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 150 | int expAx = SkFloat2Bits(a.fX) >> 23 & 0xff; |
| 151 | int expAy = SkFloat2Bits(a.fY) >> 23 & 0xff; |
| 152 | int expBx = SkFloat2Bits(b.fX) >> 23 & 0xff; |
| 153 | int expBy = SkFloat2Bits(b.fY) >> 23 & 0xff; |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 154 | ERRORF(r, "Larger-than-expected error from grvx::approx_angle_between_vectors\n" |
| 155 | " a= {%f, %f}\n" |
| 156 | " b= {%f, %f}\n" |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 157 | " expA= {%u, %u}\n" |
| 158 | " expB= {%u, %u}\n" |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 159 | " approxTheta= %f (%f degrees\n" |
| 160 | " expectedTheta= %f (%f degrees)\n" |
| 161 | " error= %f (%f degrees)\n" |
| 162 | " tolerance= %f (%f degrees)\n\n", |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 163 | a.fX, a.fY, b.fX, b.fY, expAx, expAy, expBx, expBy, approxTheta, |
| 164 | SkRadiansToDegrees(approxTheta), expectedTheta, SkRadiansToDegrees(expectedTheta), |
Chris Dalton | bb33be2 | 2021-02-24 16:30:34 -0700 | [diff] [blame] | 165 | error, SkRadiansToDegrees(error), GRVX_APPROX_ACOS_MAX_ERROR, |
| 166 | SkRadiansToDegrees(GRVX_APPROX_ACOS_MAX_ERROR)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 167 | return false; |
| 168 | } |
| 169 | return true; |
| 170 | } |
| 171 | |
| 172 | static bool check_approx_angle_between_vectors(skiatest::Reporter* r, SkVector a, SkVector b) { |
Chris Dalton | 356cef3 | 2020-12-01 02:11:24 -0700 | [diff] [blame] | 173 | float approxTheta = grvx::approx_angle_between_vectors(bit_pun<float2>(a), |
| 174 | bit_pun<float2>(b)).val; |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 175 | return check_approx_angle_between_vectors(r, a, b, approxTheta); |
| 176 | } |
| 177 | |
| 178 | DEF_TEST(grvx_approx_angle_between_vectors, r) { |
| 179 | // Test when a and/or b are zero. |
Chris Dalton | 356cef3 | 2020-12-01 02:11:24 -0700 | [diff] [blame] | 180 | REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>({0,0}, {0,0}).val)); |
| 181 | REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>({1,1}, {0,0}).val)); |
| 182 | REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>({0,0}, {1,1}).val)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 183 | check_approx_angle_between_vectors(r, {0,0}, {0,0}); |
| 184 | check_approx_angle_between_vectors(r, {1,1}, {0,0}); |
| 185 | check_approx_angle_between_vectors(r, {0,0}, {1,1}); |
| 186 | |
| 187 | // Test infinities. |
Chris Dalton | 356cef3 | 2020-12-01 02:11:24 -0700 | [diff] [blame] | 188 | REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>( |
| 189 | {std::numeric_limits<float>::infinity(),1}, {2,3}).val)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 190 | |
| 191 | // Test NaNs. |
Chris Dalton | 356cef3 | 2020-12-01 02:11:24 -0700 | [diff] [blame] | 192 | REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>( |
| 193 | {std::numeric_limits<float>::quiet_NaN(),1}, {2,3}).val)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 194 | |
| 195 | // Test demorms. |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 196 | float epsilon = std::numeric_limits<float>::denorm_min(); |
Chris Dalton | 356cef3 | 2020-12-01 02:11:24 -0700 | [diff] [blame] | 197 | REPORTER_ASSERT(r, SkScalarNearlyZero(grvx::approx_angle_between_vectors<2>( |
| 198 | {epsilon, epsilon}, {epsilon, epsilon}).val)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 199 | |
| 200 | // Test random floats of all types. |
Chris Dalton | 4195bcc | 2020-11-02 12:12:52 -0700 | [diff] [blame] | 201 | uint4 mantissas = {0,0,0,0}; |
| 202 | uint4 exp = uint4{126, 127, 128, 129}; |
| 203 | for (uint32_t i = 0; i < (1 << 12); ++i) { |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 204 | // approx_angle_between_vectors is only valid for absolute values < 2^31. |
| 205 | uint4 exp_ = skvx::min(exp, 127 + 30); |
| 206 | uint32_t a=exp_[0], b=exp_[1], c=exp_[2], d=exp_[3]; |
| 207 | // approx_angle_between_vectors is only valid if at least one vector component's magnitude |
| 208 | // is >2^-31. |
| 209 | a = std::max(a, 127u - 30); |
| 210 | c = std::max(a, 127u - 30); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 211 | // Run two tests where both components of both vectors have the same exponent, one where |
| 212 | // both components of a given vector have the same exponent, and one where all components of |
| 213 | // all vectors have different exponents. |
Chris Dalton | 6bacd9f | 2020-11-02 16:12:12 -0700 | [diff] [blame] | 214 | uint4 x0exp = uint4{a,c,a,a} << 23; |
| 215 | uint4 y0exp = uint4{a,c,a,b} << 23; |
| 216 | uint4 x1exp = uint4{a,c,c,c} << 23; |
| 217 | uint4 y1exp = uint4{a,c,c,d} << 23; |
Chris Dalton | 4195bcc | 2020-11-02 12:12:52 -0700 | [diff] [blame] | 218 | uint4 signs = uint4{i<<31, i<<30, i<<29, i<<28} & (1u<<31); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 219 | float4 x0 = bit_pun<float4>(signs | x0exp | mantissas[0]); |
| 220 | float4 y0 = bit_pun<float4>(signs | y0exp | mantissas[1]); |
| 221 | float4 x1 = bit_pun<float4>(signs | x1exp | mantissas[2]); |
| 222 | float4 y1 = bit_pun<float4>(signs | y1exp | mantissas[3]); |
Chris Dalton | 356cef3 | 2020-12-01 02:11:24 -0700 | [diff] [blame] | 223 | float4 rads = approx_angle_between_vectors(skvx::join(x0, y0), skvx::join(x1, y1)); |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 224 | for (int j = 0; j < 4; ++j) { |
| 225 | if (!check_approx_angle_between_vectors(r, {x0[j], y0[j]}, {x1[j], y1[j]}, rads[j])) { |
| 226 | return; |
| 227 | } |
| 228 | } |
| 229 | // Adding primes makes sure we test every value before we repeat. |
Chris Dalton | 4195bcc | 2020-11-02 12:12:52 -0700 | [diff] [blame] | 230 | mantissas = (mantissas + uint4{123456791, 201345691, 198765433, 156789029}) & ((1<<23) - 1); |
| 231 | exp = (exp + uint4{79, 83, 199, 7}) & 0xff; |
Chris Dalton | c3cb099 | 2020-11-02 10:52:26 -0700 | [diff] [blame] | 232 | } |
| 233 | } |
Chris Dalton | d461f6e | 2020-11-23 13:51:06 -0700 | [diff] [blame] | 234 | |
| 235 | template<int N, typename T> void check_strided_loads(skiatest::Reporter* r) { |
| 236 | using Vec = skvx::Vec<N,T>; |
| 237 | T values[N*4]; |
| 238 | std::iota(values, values + N*4, 0); |
| 239 | Vec a, b, c, d; |
| 240 | grvx::strided_load2(values, a, b); |
| 241 | for (int i = 0; i < N; ++i) { |
| 242 | REPORTER_ASSERT(r, a[i] == values[i*2]); |
| 243 | REPORTER_ASSERT(r, b[i] == values[i*2 + 1]); |
| 244 | } |
| 245 | grvx::strided_load4(values, a, b, c, d); |
| 246 | for (int i = 0; i < N; ++i) { |
| 247 | REPORTER_ASSERT(r, a[i] == values[i*4]); |
| 248 | REPORTER_ASSERT(r, b[i] == values[i*4 + 1]); |
| 249 | REPORTER_ASSERT(r, c[i] == values[i*4 + 2]); |
| 250 | REPORTER_ASSERT(r, d[i] == values[i*4 + 3]); |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | template<typename T> void check_strided_loads(skiatest::Reporter* r) { |
| 255 | check_strided_loads<1,T>(r); |
| 256 | check_strided_loads<2,T>(r); |
| 257 | check_strided_loads<4,T>(r); |
| 258 | check_strided_loads<8,T>(r); |
| 259 | check_strided_loads<16,T>(r); |
| 260 | check_strided_loads<32,T>(r); |
| 261 | } |
| 262 | |
| 263 | DEF_TEST(GrVx_strided_loads, r) { |
| 264 | check_strided_loads<uint32_t>(r); |
| 265 | check_strided_loads<uint16_t>(r); |
| 266 | check_strided_loads<uint8_t>(r); |
| 267 | check_strided_loads<int32_t>(r); |
| 268 | check_strided_loads<int16_t>(r); |
| 269 | check_strided_loads<int8_t>(r); |
| 270 | check_strided_loads<float>(r); |
| 271 | } |