Reorganize to keep similar code together.
This organizes memset16, memset32, and rsqrt the same way as the other code. No functional change.
BUG=skia:4117
R=djsollen@google.com
Review URL: https://codereview.chromium.org/1264423002 .
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 14d28e1..815216b 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -7,8 +7,11 @@
#include "SkOnce.h"
#include "SkOpts.h"
+
#define SK_OPTS_NS portable
#include "SkBlurImageFilter_opts.h"
+#include "SkFloatingPoint_opts.h"
+#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"
#if defined(SK_CPU_X86)
@@ -23,30 +26,14 @@
#include <cpu-features.h>
#endif
-namespace portable { // This helps identify methods from this file when debugging / profiling.
-
-static float rsqrt(float x) {
- // Get initial estimate.
- int i = *SkTCast<int*>(&x);
- i = 0x5F1FFFF9 - (i>>1);
- float estimate = *SkTCast<float*>(&i);
-
- // One step of Newton's method to refine.
- const float estimate_sq = estimate*estimate;
- estimate *= 0.703952253f*(2.38924456f-x*estimate_sq);
- return estimate;
-}
-
-template <typename T>
-static void memsetT(T dst[], T val, int n) { while (n --> 0) { *dst++ = val; } }
-
-} // namespace portable
-
namespace SkOpts {
// Define default function pointer values here...
+ // If our global compile options are set high enough, these 'portable' defaults might
+ // even be CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
+ // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
decltype(rsqrt) rsqrt = portable::rsqrt;
- decltype(memset16) memset16 = portable::memsetT<uint16_t>;
- decltype(memset32) memset32 = portable::memsetT<uint32_t>;
+ decltype(memset16) memset16 = portable::memset16;
+ decltype(memset32) memset32 = portable::memset32;
decltype(create_xfermode) create_xfermode = SkCreate4pxXfermode;
static const auto x = portable::kX, y = portable::kY;
diff --git a/src/opts/SkFloatingPoint_opts.h b/src/opts/SkFloatingPoint_opts.h
new file mode 100644
index 0000000..8b6536a
--- /dev/null
+++ b/src/opts/SkFloatingPoint_opts.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkFloatingPoint_opts_DEFINED
+#define SkFloatingPoint_opts_DEFINED
+
+#include "SkFloatingPoint.h"
+
+namespace SK_OPTS_NS {
+
+#if defined(SK_ARM_HAS_NEON)
+ static float rsqrt(float x) {
+ return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON compile-time path.
+ }
+#else
+ static float rsqrt(float x) {
+ // Get initial estimate.
+ int i = *SkTCast<int*>(&x);
+ i = 0x5F1FFFF9 - (i>>1);
+ float estimate = *SkTCast<float*>(&i);
+
+ // One step of Newton's method to refine.
+ const float estimate_sq = estimate*estimate;
+ estimate *= 0.703952253f*(2.38924456f-x*estimate_sq);
+ return estimate;
+ }
+#endif
+
+} // namespace SK_OPTS_NS
+
+#endif//SkFloatingPoint_opts_DEFINED
diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp
index aa71827..2db976d 100644
--- a/src/opts/SkOpts_neon.cpp
+++ b/src/opts/SkOpts_neon.cpp
@@ -5,68 +5,14 @@
* found in the LICENSE file.
*/
-#include "SkFloatingPoint.h"
#include "SkOpts.h"
+
#define SK_OPTS_NS neon
#include "SkBlurImageFilter_opts.h"
+#include "SkFloatingPoint_opts.h"
+#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"
-namespace neon { // This helps identify methods from this file when debugging / profiling.
-
-static float rsqrt(float x) {
- return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON compile-time path.
-}
-
-static void memset16(uint16_t* dst, uint16_t value, int n) {
- uint16x8_t v8 = vdupq_n_u16(value);
- uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
-
- while (n >= 32) {
- vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
- dst += 32;
- n -= 32;
- }
- switch (n / 8) {
- case 3: vst1q_u16(dst, v8); dst += 8;
- case 2: vst1q_u16(dst, v8); dst += 8;
- case 1: vst1q_u16(dst, v8); dst += 8;
- }
- if (n & 4) {
- vst1_u16(dst, vget_low_u16(v8));
- dst += 4;
- }
- switch (n & 3) {
- case 3: *dst++ = value;
- case 2: *dst++ = value;
- case 1: *dst = value;
- }
-}
-
-static void memset32(uint32_t* dst, uint32_t value, int n) {
- uint32x4_t v4 = vdupq_n_u32(value);
- uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
-
- while (n >= 16) {
- vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
- dst += 16;
- n -= 16;
- }
- switch (n / 4) {
- case 3: vst1q_u32(dst, v4); dst += 4;
- case 2: vst1q_u32(dst, v4); dst += 4;
- case 1: vst1q_u32(dst, v4); dst += 4;
- }
- if (n & 2) {
- vst1_u32(dst, vget_low_u32(v4));
- dst += 2;
- }
- if (n & 1) {
- *dst = value;
- }
-}
-
-} // namespace neon
-
namespace SkOpts {
void Init_neon() {
rsqrt = neon::rsqrt;
diff --git a/src/opts/SkOpts_sse2.cpp b/src/opts/SkOpts_sse2.cpp
index 5b4d6d6..ef0f96a 100644
--- a/src/opts/SkOpts_sse2.cpp
+++ b/src/opts/SkOpts_sse2.cpp
@@ -9,48 +9,9 @@
#define SK_OPTS_NS sse2
#include "SkBlurImageFilter_opts.h"
+#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"
-namespace sse2 { // This helps identify methods from this file when debugging / profiling.
-
-static void memset16(uint16_t* dst, uint16_t val, int n) {
- auto dst8 = (__m128i*)dst;
- auto val8 = _mm_set1_epi16(val);
- for ( ; n >= 8; n -= 8) {
- _mm_storeu_si128(dst8++, val8);
- }
- dst = (uint16_t*)dst8;
- if (n & 4) {
- _mm_storel_epi64((__m128i*)dst, val8);
- dst += 4;
- }
- if (n & 2) {
- *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
- dst += 2;
- }
- if (n & 1) {
- *dst = val;
- }
-}
-
-static void memset32(uint32_t* dst, uint32_t val, int n) {
- auto dst4 = (__m128i*)dst;
- auto val4 = _mm_set1_epi32(val);
- for ( ; n >= 4; n -= 4) {
- _mm_storeu_si128(dst4++, val4);
- }
- dst = (uint32_t*)dst4;
- if (n & 2) {
- _mm_storel_epi64((__m128i*)dst, val4);
- dst += 2;
- }
- if (n & 1) {
- *dst = val;
- }
-}
-
-} // namespace sse2
-
namespace SkOpts {
void Init_sse2() {
memset16 = sse2::memset16;
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index 8e0500b..189810c 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -6,6 +6,7 @@
*/
#include "SkOpts.h"
+
#define SK_OPTS_NS sse41
#include "SkBlurImageFilter_opts.h"
diff --git a/src/opts/SkUtils_opts.h b/src/opts/SkUtils_opts.h
new file mode 100644
index 0000000..44fe643
--- /dev/null
+++ b/src/opts/SkUtils_opts.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkUtils_opts_DEFINED
+#define SkUtils_opts_DEFINED
+
+namespace SK_OPTS_NS {
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+
+static void memset16(uint16_t* dst, uint16_t val, int n) {
+ auto dst8 = (__m128i*)dst;
+ auto val8 = _mm_set1_epi16(val);
+ for ( ; n >= 8; n -= 8) {
+ _mm_storeu_si128(dst8++, val8);
+ }
+ dst = (uint16_t*)dst8;
+ if (n & 4) {
+ _mm_storel_epi64((__m128i*)dst, val8);
+ dst += 4;
+ }
+ if (n & 2) {
+ *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
+ dst += 2;
+ }
+ if (n & 1) {
+ *dst = val;
+ }
+}
+
+static void memset32(uint32_t* dst, uint32_t val, int n) {
+ auto dst4 = (__m128i*)dst;
+ auto val4 = _mm_set1_epi32(val);
+ for ( ; n >= 4; n -= 4) {
+ _mm_storeu_si128(dst4++, val4);
+ }
+ dst = (uint32_t*)dst4;
+ if (n & 2) {
+ _mm_storel_epi64((__m128i*)dst, val4);
+ dst += 2;
+ }
+ if (n & 1) {
+ *dst = val;
+ }
+}
+
+#elif defined(SK_ARM_HAS_NEON)
+
+static void memset16(uint16_t* dst, uint16_t value, int n) {
+ uint16x8_t v8 = vdupq_n_u16(value);
+ uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
+
+ while (n >= 32) {
+ vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
+ dst += 32;
+ n -= 32;
+ }
+ switch (n / 8) {
+ case 3: vst1q_u16(dst, v8); dst += 8;
+ case 2: vst1q_u16(dst, v8); dst += 8;
+ case 1: vst1q_u16(dst, v8); dst += 8;
+ }
+ if (n & 4) {
+ vst1_u16(dst, vget_low_u16(v8));
+ dst += 4;
+ }
+ switch (n & 3) {
+ case 3: *dst++ = value;
+ case 2: *dst++ = value;
+ case 1: *dst = value;
+ }
+}
+
+static void memset32(uint32_t* dst, uint32_t value, int n) {
+ uint32x4_t v4 = vdupq_n_u32(value);
+ uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
+
+ while (n >= 16) {
+ vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
+ dst += 16;
+ n -= 16;
+ }
+ switch (n / 4) {
+ case 3: vst1q_u32(dst, v4); dst += 4;
+ case 2: vst1q_u32(dst, v4); dst += 4;
+ case 1: vst1q_u32(dst, v4); dst += 4;
+ }
+ if (n & 2) {
+ vst1_u32(dst, vget_low_u32(v4));
+ dst += 2;
+ }
+ if (n & 1) {
+ *dst = value;
+ }
+}
+
+#else // Neither NEON nor SSE2.
+
+static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } }
+static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } }
+
+#endif
+
+} // namespace SK_OPTS_NS
+
+#endif//SkUtils_opts_DEFINED