Add sqrt() and rsqrt() to Sk4f.
This doesn't add them to the second-stringer Sk4i. It's unclear we should be
doing that often, and we don't have efficient ways to do it except via floats.
BUG=skia:
Review URL: https://codereview.chromium.org/964603002
diff --git a/src/core/Sk4x.h b/src/core/Sk4x.h
index 058c400..b01b6f1 100644
--- a/src/core/Sk4x.h
+++ b/src/core/Sk4x.h
@@ -50,6 +50,9 @@
Sk4x multiply(const Sk4x&) const;
Sk4x divide(const Sk4x&) const;
+ Sk4x rsqrt() const; // Approximate reciprocal sqrt().
+ Sk4x sqrt() const; // this->multiply(this->rsqrt()) may be faster, but less precise.
+
Sk4i equal(const Sk4x&) const;
Sk4i notEqual(const Sk4x&) const;
Sk4i lessThan(const Sk4x&) const;
diff --git a/src/core/Sk4x_portable.h b/src/core/Sk4x_portable.h
index bd056c7..440e91f 100644
--- a/src/core/Sk4x_portable.h
+++ b/src/core/Sk4x_portable.h
@@ -2,6 +2,8 @@
// This file will be intentionally included three times.
#if defined(SK4X_PREAMBLE)
+ #include "SkFloatingPoint.h"
+ #include <math.h>
#elif defined(SK4X_PRIVATE)
typedef T Type;
@@ -60,6 +62,20 @@
M(Sk4x<T>) divide(const Sk4x<T>& other) const { return Sk4x(BINOP(/)); }
#undef BINOP
+template<> inline Sk4f Sk4f::rsqrt() const {
+ return Sk4f(sk_float_rsqrt(fVec[0]),
+ sk_float_rsqrt(fVec[1]),
+ sk_float_rsqrt(fVec[2]),
+ sk_float_rsqrt(fVec[3]));
+}
+
+template<> inline Sk4f Sk4f::sqrt() const {
+ return Sk4f(sqrtf(fVec[0]),
+ sqrtf(fVec[1]),
+ sqrtf(fVec[2]),
+ sqrtf(fVec[3]));
+}
+
#define BOOL_BINOP(op) fVec[0] op other.fVec[0] ? -1 : 0, \
fVec[1] op other.fVec[1] ? -1 : 0, \
fVec[2] op other.fVec[2] ? -1 : 0, \
diff --git a/src/core/Sk4x_sse.h b/src/core/Sk4x_sse.h
index ee09f77..6077d02 100644
--- a/src/core/Sk4x_sse.h
+++ b/src/core/Sk4x_sse.h
@@ -99,6 +99,9 @@
M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); }
M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); }
+M(Sk4f) rsqrt() const { return _mm_rsqrt_ps(fVec); }
+M(Sk4f) sqrt() const { return _mm_sqrt_ps( fVec); }
+
M(Sk4i) equal (const Sk4f& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
M(Sk4i) notEqual (const Sk4f& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
M(Sk4i) lessThan (const Sk4f& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
diff --git a/tests/Sk4xTest.cpp b/tests/Sk4xTest.cpp
index 0985c3b..23eaf68 100644
--- a/tests/Sk4xTest.cpp
+++ b/tests/Sk4xTest.cpp
@@ -87,6 +87,18 @@
ASSERT_EQ(Sk4f(2,4,6,8), Sk4f(1,2,3,4).multiply(2.0f));
}
+DEF_TEST(Sk4x_Sqrt, r) {
+ Sk4f squares(4, 16, 25, 121),
+ roots(2, 4, 5, 11);
+ // .sqrt() should be pretty precise.
+ ASSERT_EQ(roots, squares.sqrt());
+
+ // .rsqrt() isn't so precise, but should be pretty close.
+ Sk4f error = roots.subtract(squares.multiply(squares.rsqrt()));
+ REPORTER_ASSERT(r, error.greaterThan(0.0f).allTrue());
+ REPORTER_ASSERT(r, error.lessThan(0.01f).allTrue());
+}
+
DEF_TEST(Sk4x_Comparison, r) {
ASSERT_EQ(Sk4f(1,2,3,4), Sk4f(1,2,3,4));
ASSERT_NE(Sk4f(4,3,2,1), Sk4f(1,2,3,4));