mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2015 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkNx_sse_DEFINED |
| 9 | #define SkNx_sse_DEFINED |
| 10 | |
| 11 | // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent. |
| 12 | #include <immintrin.h> |
| 13 | |
| 14 | template <> |
| 15 | class SkNi<2, int32_t> { |
| 16 | public: |
| 17 | SkNi(const __m128i& vec) : fVec(vec) {} |
| 18 | |
| 19 | SkNi() {} |
| 20 | bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } |
| 21 | bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } |
| 22 | |
| 23 | private: |
| 24 | __m128i fVec; |
| 25 | }; |
| 26 | |
| 27 | template <> |
| 28 | class SkNi<4, int32_t> { |
| 29 | public: |
| 30 | SkNi(const __m128i& vec) : fVec(vec) {} |
| 31 | |
| 32 | SkNi() {} |
| 33 | bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } |
| 34 | bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } |
| 35 | |
| 36 | private: |
| 37 | __m128i fVec; |
| 38 | }; |
| 39 | |
| 40 | template <> |
| 41 | class SkNi<2, int64_t> { |
| 42 | public: |
| 43 | SkNi(const __m128i& vec) : fVec(vec) {} |
| 44 | |
| 45 | SkNi() {} |
| 46 | bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } |
| 47 | bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } |
| 48 | |
| 49 | private: |
| 50 | __m128i fVec; |
| 51 | }; |
| 52 | |
| 53 | |
| 54 | template <> |
| 55 | class SkNf<2, float> { |
| 56 | typedef SkNi<2, int32_t> Ni; |
| 57 | public: |
| 58 | SkNf(const __m128& vec) : fVec(vec) {} |
| 59 | |
| 60 | SkNf() {} |
| 61 | explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} |
| 62 | static SkNf Load(const float vals[2]) { |
| 63 | return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); |
| 64 | } |
| 65 | SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
| 66 | |
| 67 | void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } |
| 68 | |
| 69 | SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
| 70 | SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
| 71 | SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
| 72 | SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
| 73 | |
| 74 | Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } |
| 75 | Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } |
| 76 | Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } |
| 77 | Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } |
| 78 | Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } |
| 79 | Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } |
| 80 | |
| 81 | static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } |
| 82 | static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } |
| 83 | |
| 84 | SkNf sqrt() const { return _mm_sqrt_ps (fVec); } |
| 85 | SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } |
| 86 | |
| 87 | SkNf invert() const { return SkNf(1) / *this; } |
| 88 | SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |
| 89 | |
mtklein | a156a8f | 2015-04-03 06:16:13 -0700 | [diff] [blame^] | 90 | template <int k> float kth() const { |
mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 91 | SkASSERT(0 <= k && k < 2); |
| 92 | union { __m128 v; float fs[4]; } pun = {fVec}; |
mtklein | a156a8f | 2015-04-03 06:16:13 -0700 | [diff] [blame^] | 93 | return pun.fs[k&1]; |
mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 94 | } |
| 95 | |
| 96 | private: |
| 97 | __m128 fVec; |
| 98 | }; |
| 99 | |
| 100 | template <> |
| 101 | class SkNf<2, double> { |
| 102 | typedef SkNi<2, int64_t> Ni; |
| 103 | public: |
| 104 | SkNf(const __m128d& vec) : fVec(vec) {} |
| 105 | |
| 106 | SkNf() {} |
| 107 | explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} |
| 108 | static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } |
| 109 | SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} |
| 110 | |
| 111 | void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } |
| 112 | |
| 113 | SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } |
| 114 | SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } |
| 115 | SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } |
| 116 | SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } |
| 117 | |
| 118 | Ni operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); } |
| 119 | Ni operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd(fVec, o.fVec)); } |
| 120 | Ni operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); } |
| 121 | Ni operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); } |
| 122 | Ni operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); } |
| 123 | Ni operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); } |
| 124 | |
| 125 | static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); } |
| 126 | static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } |
| 127 | |
| 128 | SkNf sqrt() const { return _mm_sqrt_pd(fVec); } |
| 129 | SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } |
| 130 | |
| 131 | SkNf invert() const { return SkNf(1) / *this; } |
| 132 | SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } |
| 133 | |
mtklein | a156a8f | 2015-04-03 06:16:13 -0700 | [diff] [blame^] | 134 | template <int k> double kth() const { |
mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 135 | SkASSERT(0 <= k && k < 2); |
| 136 | union { __m128d v; double ds[2]; } pun = {fVec}; |
mtklein | a156a8f | 2015-04-03 06:16:13 -0700 | [diff] [blame^] | 137 | return pun.ds[k&1]; |
mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 138 | } |
| 139 | |
| 140 | private: |
| 141 | __m128d fVec; |
| 142 | }; |
| 143 | |
| 144 | template <> |
| 145 | class SkNf<4, float> { |
| 146 | typedef SkNi<4, int32_t> Ni; |
| 147 | public: |
| 148 | SkNf(const __m128& vec) : fVec(vec) {} |
| 149 | __m128 vec() const { return fVec; } |
| 150 | |
| 151 | SkNf() {} |
| 152 | explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
| 153 | static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
| 154 | SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
| 155 | |
| 156 | void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
| 157 | |
| 158 | SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
| 159 | SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
| 160 | SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
| 161 | SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
| 162 | |
| 163 | Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } |
| 164 | Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } |
| 165 | Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } |
| 166 | Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } |
| 167 | Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } |
| 168 | Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } |
| 169 | |
| 170 | static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } |
| 171 | static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } |
| 172 | |
| 173 | SkNf sqrt() const { return _mm_sqrt_ps (fVec); } |
| 174 | SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } |
| 175 | |
| 176 | SkNf invert() const { return SkNf(1) / *this; } |
| 177 | SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |
| 178 | |
mtklein | a156a8f | 2015-04-03 06:16:13 -0700 | [diff] [blame^] | 179 | template <int k> float kth() const { |
mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 180 | SkASSERT(0 <= k && k < 4); |
| 181 | union { __m128 v; float fs[4]; } pun = {fVec}; |
mtklein | a156a8f | 2015-04-03 06:16:13 -0700 | [diff] [blame^] | 182 | return pun.fs[k&3]; |
mtklein | c9adb05 | 2015-03-30 10:50:27 -0700 | [diff] [blame] | 183 | } |
| 184 | |
| 185 | private: |
| 186 | __m128 fVec; |
| 187 | }; |
| 188 | |
| 189 | |
| 190 | #endif//SkNx_sse_DEFINED |