blob: cae2d458f66b968621ee78d7abd2bbc01b23a2fc [file] [log] [blame]
mtkleinc9adb052015-03-30 10:50:27 -07001/*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkNx_sse_DEFINED
9#define SkNx_sse_DEFINED
10
11// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent.
12#include <immintrin.h>
13
14template <>
15class SkNi<2, int32_t> {
16public:
17 SkNi(const __m128i& vec) : fVec(vec) {}
18
19 SkNi() {}
20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); }
21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); }
22
23private:
24 __m128i fVec;
25};
26
27template <>
28class SkNi<4, int32_t> {
29public:
30 SkNi(const __m128i& vec) : fVec(vec) {}
31
32 SkNi() {}
33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
35
36private:
37 __m128i fVec;
38};
39
40template <>
41class SkNi<2, int64_t> {
42public:
43 SkNi(const __m128i& vec) : fVec(vec) {}
44
45 SkNi() {}
46 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
47 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
48
49private:
50 __m128i fVec;
51};
52
53
54template <>
55class SkNf<2, float> {
56 typedef SkNi<2, int32_t> Ni;
57public:
58 SkNf(const __m128& vec) : fVec(vec) {}
59
60 SkNf() {}
61 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {}
62 static SkNf Load(const float vals[2]) {
63 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
64 }
65 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
66
67 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
68
69 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
70 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
71 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
72 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); }
73
74 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); }
75 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); }
76 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); }
77 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); }
78 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); }
79 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); }
80
81 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); }
82 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); }
83
84 SkNf sqrt() const { return _mm_sqrt_ps (fVec); }
85 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); }
86
87 SkNf invert() const { return SkNf(1) / *this; }
88 SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
89
mtkleina156a8f2015-04-03 06:16:13 -070090 template <int k> float kth() const {
mtkleinc9adb052015-03-30 10:50:27 -070091 SkASSERT(0 <= k && k < 2);
92 union { __m128 v; float fs[4]; } pun = {fVec};
mtkleina156a8f2015-04-03 06:16:13 -070093 return pun.fs[k&1];
mtkleinc9adb052015-03-30 10:50:27 -070094 }
95
96private:
97 __m128 fVec;
98};
99
100template <>
101class SkNf<2, double> {
102 typedef SkNi<2, int64_t> Ni;
103public:
104 SkNf(const __m128d& vec) : fVec(vec) {}
105
106 SkNf() {}
107 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {}
108 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); }
109 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
110
111 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
112
113 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); }
114 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); }
115 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); }
116 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); }
117
118 Ni operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); }
119 Ni operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd(fVec, o.fVec)); }
120 Ni operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); }
121 Ni operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); }
122 Ni operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); }
123 Ni operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); }
124
125 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); }
126 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); }
127
128 SkNf sqrt() const { return _mm_sqrt_pd(fVec); }
129 SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); }
130
131 SkNf invert() const { return SkNf(1) / *this; }
132 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); }
133
mtkleina156a8f2015-04-03 06:16:13 -0700134 template <int k> double kth() const {
mtkleinc9adb052015-03-30 10:50:27 -0700135 SkASSERT(0 <= k && k < 2);
136 union { __m128d v; double ds[2]; } pun = {fVec};
mtkleina156a8f2015-04-03 06:16:13 -0700137 return pun.ds[k&1];
mtkleinc9adb052015-03-30 10:50:27 -0700138 }
139
140private:
141 __m128d fVec;
142};
143
144template <>
145class SkNf<4, float> {
146 typedef SkNi<4, int32_t> Ni;
147public:
148 SkNf(const __m128& vec) : fVec(vec) {}
149 __m128 vec() const { return fVec; }
150
151 SkNf() {}
152 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
153 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); }
154 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
155
156 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
157
158 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
159 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
160 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
161 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); }
162
163 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); }
164 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); }
165 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); }
166 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); }
167 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); }
168 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); }
169
170 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); }
171 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); }
172
173 SkNf sqrt() const { return _mm_sqrt_ps (fVec); }
174 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); }
175
176 SkNf invert() const { return SkNf(1) / *this; }
177 SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
178
mtkleina156a8f2015-04-03 06:16:13 -0700179 template <int k> float kth() const {
mtkleinc9adb052015-03-30 10:50:27 -0700180 SkASSERT(0 <= k && k < 4);
181 union { __m128 v; float fs[4]; } pun = {fVec};
mtkleina156a8f2015-04-03 06:16:13 -0700182 return pun.fs[k&3];
mtkleinc9adb052015-03-30 10:50:27 -0700183 }
184
185private:
186 __m128 fVec;
187};
188
189
190#endif//SkNx_sse_DEFINED