Revert "standardize macro checks in SkRasterPipeline_opts"
This reverts commit 75d25c4c8f0cb61770faef77a9a24f3392316dfb.
Reason for revert: subtly broken, going to give this a redo.
Original change's description:
> standardize macro checks in SkRasterPipeline_opts
>
> We mostly check JUMPER_IS_FOO macros, but in a few places we are
> checking the raw __FOO__ compiler macros instead. If only for clarity,
> switch to JUMPER_IS_FOO.
>
> This is also a step towards me being able to control the instruction
> set we choose without having to #define these protected __FOO__ macros.
>
> Change-Id: Ieea2090ff658399e27746e0bb8ce950b06f9efb8
> Reviewed-on: https://skia-review.googlesource.com/150961
> Commit-Queue: Brian Osman <brianosman@google.com>
> Auto-Submit: Mike Klein <mtklein@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>
TBR=mtklein@google.com,brianosman@google.com
Change-Id: Ifbf5b6f51a29ad4e02e8ca311e449c13cc3ed19c
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/150964
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index fabd5cb..58874d4 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -2191,7 +2191,7 @@
#else // We are compiling vector code with Clang... let's make some lowp stages!
-#if defined(JUMPER_IS_HSW)
+#if defined(__AVX2__)
using U8 = uint8_t __attribute__((ext_vector_type(16)));
using U16 = uint16_t __attribute__((ext_vector_type(16)));
using I16 = int16_t __attribute__((ext_vector_type(16)));
@@ -2417,7 +2417,7 @@
SI U32 trunc_(F x) { return (U32)cast<I32>(x); }
SI F rcp(F x) {
-#if defined(JUMPER_IS_HSW)
+#if defined(__AVX2__)
__m256 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm256_rcp_ps(lo), _mm256_rcp_ps(hi));
@@ -2438,7 +2438,7 @@
#endif
}
SI F sqrt_(F x) {
-#if defined(JUMPER_IS_HSW)
+#if defined(__AVX2__)
__m256 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm256_sqrt_ps(lo), _mm256_sqrt_ps(hi));
@@ -2473,11 +2473,11 @@
float32x4_t lo,hi;
split(x, &lo,&hi);
return join<F>(vrndmq_f32(lo), vrndmq_f32(hi));
-#elif defined(JUMPER_IS_HSW)
+#elif defined(__AVX2__)
__m256 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm256_floor_ps(lo), _mm256_floor_ps(hi));
-#elif defined(JUMPER_IS_SSE41)
+#elif defined(__SSE4_1__)
__m128 lo,hi;
split(x, &lo,&hi);
return join<F>(_mm_floor_ps(lo), _mm_floor_ps(hi));
@@ -2666,7 +2666,7 @@
V v = 0;
switch (tail & (N-1)) {
case 0: memcpy(&v, ptr, sizeof(v)); break;
- #if defined(JUMPER_IS_HSW)
+ #if defined(__AVX2__)
case 15: v[14] = ptr[14];
case 14: v[13] = ptr[13];
case 13: v[12] = ptr[12];
@@ -2690,7 +2690,7 @@
SI void store(T* ptr, size_t tail, V v) {
switch (tail & (N-1)) {
case 0: memcpy(ptr, &v, sizeof(v)); break;
- #if defined(JUMPER_IS_HSW)
+ #if defined(__AVX2__)
case 15: ptr[14] = v[14];
case 14: ptr[13] = v[13];
case 13: ptr[12] = v[12];
@@ -2710,7 +2710,7 @@
}
}
-#if defined(JUMPER_IS_HSW)
+#if defined(__AVX2__)
template <typename V, typename T>
SI V gather(const T* ptr, U32 ix) {
return V{ ptr[ix[ 0]], ptr[ix[ 1]], ptr[ix[ 2]], ptr[ix[ 3]],
@@ -2748,7 +2748,7 @@
// ~~~~~~ 32-bit memory loads and stores ~~~~~~ //
SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
-#if 1 && defined(JUMPER_IS_HSW)
+#if 1 && defined(__AVX2__)
// Swap the middle 128-bit lanes to make _mm256_packus_epi32() in cast_U16() work out nicely.
__m256i _01,_23;
split(rgba, &_01, &_23);
@@ -3084,7 +3084,7 @@
U16* r, U16* g, U16* b, U16* a) {
F fr, fg, fb, fa, br, bg, bb, ba;
-#if defined(JUMPER_IS_HSW)
+#if defined(__AVX2__)
if (c->stopCount <=8) {
__m256i lo, hi;
split(idx, &lo, &hi);