Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 1 | // Copyright 2020 Google LLC. |
Mike Klein | a67d1ae | 2020-03-09 17:36:00 -0500 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 3 | |
| 4 | #ifndef SkVM_opts_DEFINED |
| 5 | #define SkVM_opts_DEFINED |
| 6 | |
| 7 | #include "include/private/SkVx.h" |
| 8 | #include "src/core/SkVM.h" |
| 9 | |
Mike Klein | 6d94b65 | 2020-09-16 11:37:03 -0500 | [diff] [blame] | 10 | // Ideally this is (x*y + 0x2000)>>14, |
| 11 | // but to let use vpmulhrsw we'll approximate that as ((x*y + 0x4000)>>15)<<1. |
| 12 | template <int N> |
| 13 | static inline skvx::Vec<N,int16_t> mul_q14(const skvx::Vec<N,int16_t>& x, |
| 14 | const skvx::Vec<N,int16_t>& y) { |
| 15 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 |
| 16 | if constexpr (N == 16) { |
| 17 | return skvx::bit_pun<skvx::Vec<N,int16_t>>(_mm256_mulhrs_epi16(skvx::bit_pun<__m256i>(x), |
| 18 | skvx::bit_pun<__m256i>(y))) |
| 19 | << 1; |
| 20 | } |
| 21 | #endif |
| 22 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 23 | if constexpr (N == 8) { |
| 24 | return skvx::bit_pun<skvx::Vec<N,int16_t>>(_mm_mulhrs_epi16(skvx::bit_pun<__m128i>(x), |
| 25 | skvx::bit_pun<__m128i>(y))) |
| 26 | << 1; |
| 27 | } |
| 28 | #endif |
| 29 | // TODO: NEON specialization with vqrdmulh.s16? |
| 30 | |
| 31 | // Try to recurse onto the specializations above. |
| 32 | if constexpr (N > 8) { |
| 33 | return join(mul_q14(x.lo, y.lo), |
| 34 | mul_q14(x.hi, y.hi)); |
| 35 | } |
| 36 | return skvx::cast<int16_t>((skvx::cast<int>(x) * |
| 37 | skvx::cast<int>(y) + 0x4000)>>15 ) <<1; |
| 38 | } |
| 39 | |
Mike Klein | 2e69a13 | 2020-09-18 08:02:46 -0500 | [diff] [blame] | 40 | template <int N> |
| 41 | static inline skvx::Vec<N,int> gather32(const int* ptr, const skvx::Vec<N,int>& ix) { |
| 42 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 |
| 43 | if constexpr (N == 8) { |
| 44 | return skvx::bit_pun<skvx::Vec<N,int>>( |
| 45 | _mm256_i32gather_epi32(ptr, skvx::bit_pun<__m256i>(ix), 4)); |
| 46 | } |
| 47 | #endif |
| 48 | // Try to recurse on specializations, falling back on standard scalar map()-based impl. |
| 49 | if constexpr (N > 8) { |
| 50 | return join(gather32(ptr, ix.lo), |
| 51 | gather32(ptr, ix.hi)); |
| 52 | } |
| 53 | return map(ix, [&](int i) { return ptr[i]; }); |
| 54 | } |
| 55 | |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 56 | namespace SK_OPTS_NS { |
| 57 | |
| 58 | inline void interpret_skvm(const skvm::InterpreterInstruction insts[], const int ninsts, |
| 59 | const int nregs, const int loop, |
| 60 | const int strides[], const int nargs, |
| 61 | int n, void* args[]) { |
| 62 | using namespace skvm; |
| 63 | |
| 64 | // We'll operate in SIMT style, knocking off K-size chunks from n while possible. |
Mike Klein | 51d35ed | 2020-04-24 08:16:22 -0500 | [diff] [blame] | 65 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 |
Mike Klein | 394a6d5 | 2020-09-18 14:04:19 -0500 | [diff] [blame] | 66 | constexpr int K = 32; // 1024-bit: 4 ymm or 2 zmm at a time |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 67 | #else |
Mike Klein | 394a6d5 | 2020-09-18 14:04:19 -0500 | [diff] [blame] | 68 | constexpr int K = 8; // 256-bit: 2 xmm, 2 v-registers, etc. |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 69 | #endif |
| 70 | using I32 = skvx::Vec<K, int>; |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 71 | using I16 = skvx::Vec<K, int16_t>; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 72 | using F32 = skvx::Vec<K, float>; |
Mike Klein | 6732da0 | 2020-07-16 13:03:18 -0500 | [diff] [blame] | 73 | using U64 = skvx::Vec<K, uint64_t>; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 74 | using U32 = skvx::Vec<K, uint32_t>; |
| 75 | using U16 = skvx::Vec<K, uint16_t>; |
| 76 | using U8 = skvx::Vec<K, uint8_t>; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 77 | union Slot { |
| 78 | F32 f32; |
| 79 | I32 i32; |
| 80 | U32 u32; |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 81 | I16 i16; |
| 82 | U16 u16; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 83 | }; |
| 84 | |
| 85 | Slot few_regs[16]; |
| 86 | std::unique_ptr<char[]> many_regs; |
| 87 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 88 | Slot* r = few_regs; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 89 | |
| 90 | if (nregs > (int)SK_ARRAY_COUNT(few_regs)) { |
| 91 | // Annoyingly we can't trust that malloc() or new will work with Slot because |
| 92 | // the skvx::Vec types may have alignment greater than what they provide. |
| 93 | // We'll overallocate one extra register so we can align manually. |
| 94 | many_regs.reset(new char[ sizeof(Slot) * (nregs + 1) ]); |
| 95 | |
| 96 | uintptr_t addr = (uintptr_t)many_regs.get(); |
| 97 | addr += alignof(Slot) - |
| 98 | (addr & (alignof(Slot) - 1)); |
| 99 | SkASSERT((addr & (alignof(Slot) - 1)) == 0); |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 100 | r = (Slot*)addr; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 101 | } |
| 102 | |
| 103 | |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 104 | // Step each argument pointer ahead by its stride a number of times. |
| 105 | auto step_args = [&](int times) { |
| 106 | for (int i = 0; i < nargs; i++) { |
| 107 | args[i] = (void*)( (char*)args[i] + times * strides[i] ); |
| 108 | } |
| 109 | }; |
| 110 | |
| 111 | int start = 0, |
| 112 | stride; |
| 113 | for ( ; n > 0; start = loop, n -= stride, step_args(stride)) { |
| 114 | stride = n >= K ? K : 1; |
| 115 | |
| 116 | for (int i = start; i < ninsts; i++) { |
| 117 | InterpreterInstruction inst = insts[i]; |
| 118 | |
| 119 | // d = op(x,y/imm,z/imm) |
| 120 | Reg d = inst.d, |
| 121 | x = inst.x, |
| 122 | y = inst.y, |
| 123 | z = inst.z; |
| 124 | int immy = inst.immy, |
| 125 | immz = inst.immz; |
| 126 | |
| 127 | // Ops that interact with memory need to know whether we're stride=1 or K, |
| 128 | // but all non-memory ops can run the same code no matter the stride. |
| 129 | switch (2*(int)inst.op + (stride == K ? 1 : 0)) { |
| 130 | default: SkUNREACHABLE; |
| 131 | |
| 132 | #define STRIDE_1(op) case 2*(int)op |
| 133 | #define STRIDE_K(op) case 2*(int)op + 1 |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 134 | STRIDE_1(Op::store8 ): memcpy(args[immy], &r[x].i32, 1); break; |
| 135 | STRIDE_1(Op::store16): memcpy(args[immy], &r[x].i32, 2); break; |
| 136 | STRIDE_1(Op::store32): memcpy(args[immy], &r[x].i32, 4); break; |
Mike Klein | 6732da0 | 2020-07-16 13:03:18 -0500 | [diff] [blame] | 137 | STRIDE_1(Op::store64): memcpy((char*)args[immz]+0, &r[x].i32, 4); |
| 138 | memcpy((char*)args[immz]+4, &r[y].i32, 4); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 139 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 140 | STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r[x].i32).store(args[immy]); break; |
| 141 | STRIDE_K(Op::store16): skvx::cast<uint16_t>(r[x].i32).store(args[immy]); break; |
| 142 | STRIDE_K(Op::store32): (r[x].i32).store(args[immy]); break; |
Mike Klein | 6732da0 | 2020-07-16 13:03:18 -0500 | [diff] [blame] | 143 | STRIDE_K(Op::store64): (skvx::cast<uint64_t>(r[x].u32) << 0 | |
| 144 | skvx::cast<uint64_t>(r[y].u32) << 32).store(args[immz]); |
| 145 | break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 146 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 147 | STRIDE_1(Op::load8 ): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 1); break; |
| 148 | STRIDE_1(Op::load16): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 2); break; |
| 149 | STRIDE_1(Op::load32): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 4); break; |
Mike Klein | 3136789 | 2020-07-30 08:19:12 -0500 | [diff] [blame] | 150 | STRIDE_1(Op::load64): |
| 151 | r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immy] + 4*immz, 4); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 152 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 153 | STRIDE_K(Op::load8 ): r[d].i32= skvx::cast<int>(U8 ::Load(args[immy])); break; |
| 154 | STRIDE_K(Op::load16): r[d].i32= skvx::cast<int>(U16::Load(args[immy])); break; |
| 155 | STRIDE_K(Op::load32): r[d].i32= I32::Load(args[immy]) ; break; |
Mike Klein | 3136789 | 2020-07-30 08:19:12 -0500 | [diff] [blame] | 156 | STRIDE_K(Op::load64): |
| 157 | // Low 32 bits if immz=0, or high 32 bits if immz=1. |
| 158 | r[d].i32 = skvx::cast<int>(U64::Load(args[immy]) >> (32*immz)); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 159 | |
| 160 | // The pointer we base our gather on is loaded indirectly from a uniform: |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 161 | // - args[immy] is the uniform holding our gather base pointer somewhere; |
| 162 | // - (const uint8_t*)args[immy] + immz points to the gather base pointer; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 163 | // - memcpy() loads the gather base and into a pointer of the right type. |
| 164 | // After all that we have an ordinary (uniform) pointer `ptr` to load from, |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 165 | // and we then gather from it using the varying indices in r[x]. |
Mike Klein | feb4d10 | 2020-09-17 08:54:08 -0500 | [diff] [blame] | 166 | STRIDE_1(Op::gather8): { |
| 167 | const uint8_t* ptr; |
| 168 | memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr)); |
| 169 | r[d].i32 = ptr[ r[x].i32[0] ]; |
| 170 | } break; |
| 171 | STRIDE_1(Op::gather16): { |
| 172 | const uint16_t* ptr; |
| 173 | memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr)); |
| 174 | r[d].i32 = ptr[ r[x].i32[0] ]; |
| 175 | } break; |
| 176 | STRIDE_1(Op::gather32): { |
| 177 | const int* ptr; |
| 178 | memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr)); |
| 179 | r[d].i32 = ptr[ r[x].i32[0] ]; |
| 180 | } break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 181 | |
Mike Klein | feb4d10 | 2020-09-17 08:54:08 -0500 | [diff] [blame] | 182 | STRIDE_K(Op::gather8): { |
| 183 | const uint8_t* ptr; |
| 184 | memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr)); |
| 185 | r[d].i32 = map(r[x].i32, [&](int ix) { return (int)ptr[ix]; }); |
| 186 | } break; |
| 187 | STRIDE_K(Op::gather16): { |
| 188 | const uint16_t* ptr; |
| 189 | memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr)); |
| 190 | r[d].i32 = map(r[x].i32, [&](int ix) { return (int)ptr[ix]; }); |
| 191 | } break; |
| 192 | STRIDE_K(Op::gather32): { |
| 193 | const int* ptr; |
| 194 | memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr)); |
Mike Klein | 2e69a13 | 2020-09-18 08:02:46 -0500 | [diff] [blame] | 195 | r[d].i32 = gather32(ptr, r[x].i32); |
Mike Klein | feb4d10 | 2020-09-17 08:54:08 -0500 | [diff] [blame] | 196 | } break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 197 | |
| 198 | #undef STRIDE_1 |
| 199 | #undef STRIDE_K |
| 200 | |
| 201 | // Ops that don't interact with memory should never care about the stride. |
| 202 | #define CASE(op) case 2*(int)op: /*fallthrough*/ case 2*(int)op+1 |
| 203 | |
Mike Klein | 89b3c1f | 2020-07-29 16:45:05 -0500 | [diff] [blame] | 204 | // These 128-bit ops are implemented serially for simplicity. |
Mike Klein | 3136789 | 2020-07-30 08:19:12 -0500 | [diff] [blame] | 205 | CASE(Op::store128): { |
| 206 | int ptr = immz>>1, |
| 207 | lane = immz&1; |
Mike Klein | 89b3c1f | 2020-07-29 16:45:05 -0500 | [diff] [blame] | 208 | U64 src = (skvx::cast<uint64_t>(r[x].u32) << 0 | |
| 209 | skvx::cast<uint64_t>(r[y].u32) << 32); |
| 210 | for (int i = 0; i < stride; i++) { |
Mike Klein | 3136789 | 2020-07-30 08:19:12 -0500 | [diff] [blame] | 211 | memcpy((char*)args[ptr] + 16*i + 8*lane, &src[i], 8); |
Mike Klein | 89b3c1f | 2020-07-29 16:45:05 -0500 | [diff] [blame] | 212 | } |
| 213 | } break; |
| 214 | |
Mike Klein | 3136789 | 2020-07-30 08:19:12 -0500 | [diff] [blame] | 215 | CASE(Op::load128): |
Mike Klein | 89b3c1f | 2020-07-29 16:45:05 -0500 | [diff] [blame] | 216 | r[d].i32 = 0; |
| 217 | for (int i = 0; i < stride; i++) { |
| 218 | memcpy(&r[d].i32[i], (const char*)args[immy] + 16*i+ 4*immz, 4); |
| 219 | } break; |
| 220 | |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 221 | CASE(Op::assert_true): |
| 222 | #ifdef SK_DEBUG |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 223 | if (!all(r[x].i32)) { |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 224 | SkDebugf("inst %d, register %d\n", i, y); |
| 225 | for (int i = 0; i < K; i++) { |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 226 | SkDebugf("\t%2d: %08x (%g)\n", i, r[y].i32[i], r[y].f32[i]); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 227 | } |
Mike Klein | 51a7f95 | 2020-09-16 16:00:33 -0500 | [diff] [blame] | 228 | SkASSERT(false); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 229 | } |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 230 | #endif |
| 231 | break; |
| 232 | |
| 233 | CASE(Op::index): { |
| 234 | const int iota[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, |
Mike Klein | 394a6d5 | 2020-09-18 14:04:19 -0500 | [diff] [blame] | 235 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
| 236 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
| 237 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 }; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 238 | static_assert(K <= SK_ARRAY_COUNT(iota), ""); |
| 239 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 240 | r[d].i32 = n - I32::Load(iota); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 241 | } break; |
| 242 | |
| 243 | CASE(Op::uniform8): |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 244 | r[d].i32 = *(const uint8_t* )( (const char*)args[immy] + immz ); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 245 | break; |
| 246 | CASE(Op::uniform16): |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 247 | r[d].i32 = *(const uint16_t*)( (const char*)args[immy] + immz ); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 248 | break; |
| 249 | CASE(Op::uniform32): |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 250 | r[d].i32 = *(const int* )( (const char*)args[immy] + immz ); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 251 | break; |
| 252 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 253 | CASE(Op::splat): r[d].i32 = immy; break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 254 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 255 | CASE(Op::add_f32): r[d].f32 = r[x].f32 + r[y].f32; break; |
| 256 | CASE(Op::sub_f32): r[d].f32 = r[x].f32 - r[y].f32; break; |
| 257 | CASE(Op::mul_f32): r[d].f32 = r[x].f32 * r[y].f32; break; |
| 258 | CASE(Op::div_f32): r[d].f32 = r[x].f32 / r[y].f32; break; |
| 259 | CASE(Op::min_f32): r[d].f32 = min(r[x].f32, r[y].f32); break; |
| 260 | CASE(Op::max_f32): r[d].f32 = max(r[x].f32, r[y].f32); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 261 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 262 | CASE(Op::fma_f32): r[d].f32 = fma( r[x].f32, r[y].f32, r[z].f32); break; |
| 263 | CASE(Op::fms_f32): r[d].f32 = fma( r[x].f32, r[y].f32, -r[z].f32); break; |
| 264 | CASE(Op::fnma_f32): r[d].f32 = fma(-r[x].f32, r[y].f32, r[z].f32); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 265 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 266 | CASE(Op::sqrt_f32): r[d].f32 = sqrt(r[x].f32); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 267 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 268 | CASE(Op::add_i32): r[d].i32 = r[x].i32 + r[y].i32; break; |
| 269 | CASE(Op::sub_i32): r[d].i32 = r[x].i32 - r[y].i32; break; |
| 270 | CASE(Op::mul_i32): r[d].i32 = r[x].i32 * r[y].i32; break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 271 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 272 | CASE(Op::shl_i32): r[d].i32 = r[x].i32 << immy; break; |
| 273 | CASE(Op::sra_i32): r[d].i32 = r[x].i32 >> immy; break; |
| 274 | CASE(Op::shr_i32): r[d].u32 = r[x].u32 >> immy; break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 275 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 276 | CASE(Op:: eq_f32): r[d].i32 = r[x].f32 == r[y].f32; break; |
| 277 | CASE(Op::neq_f32): r[d].i32 = r[x].f32 != r[y].f32; break; |
| 278 | CASE(Op:: gt_f32): r[d].i32 = r[x].f32 > r[y].f32; break; |
| 279 | CASE(Op::gte_f32): r[d].i32 = r[x].f32 >= r[y].f32; break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 280 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 281 | CASE(Op:: eq_i32): r[d].i32 = r[x].i32 == r[y].i32; break; |
| 282 | CASE(Op:: gt_i32): r[d].i32 = r[x].i32 > r[y].i32; break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 283 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 284 | CASE(Op::bit_and ): r[d].i32 = r[x].i32 & r[y].i32; break; |
| 285 | CASE(Op::bit_or ): r[d].i32 = r[x].i32 | r[y].i32; break; |
| 286 | CASE(Op::bit_xor ): r[d].i32 = r[x].i32 ^ r[y].i32; break; |
| 287 | CASE(Op::bit_clear): r[d].i32 = r[x].i32 & ~r[y].i32; break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 288 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 289 | CASE(Op::select): r[d].i32 = skvx::if_then_else(r[x].i32, r[y].i32, r[z].i32); |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 290 | break; |
| 291 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 292 | CASE(Op::pack): r[d].u32 = r[x].u32 | (r[y].u32 << immz); break; |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 293 | |
Mike Klein | 4284f75 | 2020-07-10 15:16:17 -0500 | [diff] [blame] | 294 | CASE(Op::ceil): r[d].f32 = skvx::ceil(r[x].f32) ; break; |
| 295 | CASE(Op::floor): r[d].f32 = skvx::floor(r[x].f32) ; break; |
| 296 | CASE(Op::to_f32): r[d].f32 = skvx::cast<float>( r[x].i32 ); break; |
| 297 | CASE(Op::trunc): r[d].i32 = skvx::cast<int> ( r[x].f32 ); break; |
| 298 | CASE(Op::round): r[d].i32 = skvx::cast<int> (skvx::lrint(r[x].f32)); break; |
Mike Klein | 4d680cd | 2020-07-15 09:58:51 -0500 | [diff] [blame] | 299 | |
| 300 | CASE(Op::to_half): |
| 301 | r[d].i32 = skvx::cast<int>(skvx::to_half(r[x].f32)); |
| 302 | break; |
| 303 | CASE(Op::from_half): |
| 304 | r[d].f32 = skvx::from_half(skvx::cast<uint16_t>(r[x].i32)); |
| 305 | break; |
Mike Klein | 98c512c | 2020-09-15 10:00:27 -0500 | [diff] [blame] | 306 | |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 307 | CASE(Op::splat_q14): r[d].i16 = immy; break; |
Mike Klein | 9791e50 | 2020-09-15 12:43:38 -0500 | [diff] [blame] | 308 | |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 309 | CASE(Op::add_q14): r[d].i16 = r[x].i16 + r[y].i16; break; |
| 310 | CASE(Op::sub_q14): r[d].i16 = r[x].i16 - r[y].i16; break; |
| 311 | CASE(Op::mul_q14): r[d].i16 = mul_q14(r[x].i16, r[y].i16); break; |
Mike Klein | 9791e50 | 2020-09-15 12:43:38 -0500 | [diff] [blame] | 312 | |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 313 | CASE(Op::shl_q14): r[d].i16 = r[x].i16 << immy; break; |
| 314 | CASE(Op::sra_q14): r[d].i16 = r[x].i16 >> immy; break; |
| 315 | CASE(Op::shr_q14): r[d].u16 = r[x].u16 >> immy; break; |
Mike Klein | 9791e50 | 2020-09-15 12:43:38 -0500 | [diff] [blame] | 316 | |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 317 | CASE(Op::eq_q14): r[d].i16 = r[x].i16 == r[y].i16; break; |
| 318 | CASE(Op::gt_q14): r[d].i16 = r[x].i16 > r[y].i16; break; |
| 319 | |
| 320 | CASE(Op::min_q14): r[d].i16 = min(r[x].i16, r[y].i16); break; |
| 321 | CASE(Op::max_q14): r[d].i16 = max(r[x].i16, r[y].i16); break; |
| 322 | |
| 323 | CASE(Op::bit_and_q14): r[d].i16 = r[x].i16 & r[y].i16; break; |
| 324 | CASE(Op::bit_or_q14 ): r[d].i16 = r[x].i16 | r[y].i16; break; |
| 325 | CASE(Op::bit_xor_q14): r[d].i16 = r[x].i16 ^ r[y].i16; break; |
| 326 | CASE(Op::bit_clear_q14): r[d].i16 = r[x].i16 & ~r[y].i16; break; |
| 327 | |
| 328 | CASE(Op::select_q14): |
| 329 | r[d].i16 = skvx::if_then_else(r[x].i16, r[y].i16, r[z].i16); |
| 330 | break; |
Mike Klein | 9791e50 | 2020-09-15 12:43:38 -0500 | [diff] [blame] | 331 | |
Mike Klein | 7b1620f | 2020-09-16 10:18:47 -0500 | [diff] [blame] | 332 | // Happily, Clang can see through this one and generates perfect code |
| 333 | // using vpavgw without any help from us! |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 334 | CASE(Op::uavg_q14): |
| 335 | r[d].u16 = skvx::cast<uint16_t>( (skvx::cast<int>(r[x].u16) + |
| 336 | skvx::cast<int>(r[y].u16) + 1)>>1 ); |
Mike Klein | 9791e50 | 2020-09-15 12:43:38 -0500 | [diff] [blame] | 337 | break; |
Mike Klein | 6b72d3a | 2020-09-24 11:17:22 -0500 | [diff] [blame^] | 338 | |
| 339 | CASE(Op::to_q14): r[d].i16 = skvx::cast<int16_t>(r[x].i32); break; |
| 340 | CASE(Op::from_q14): r[d].i32 = skvx::cast<int32_t>(r[x].i16); break; |
| 341 | |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 342 | #undef CASE |
| 343 | } |
| 344 | } |
| 345 | } |
| 346 | } |
| 347 | |
John Stiles | a6841be | 2020-08-06 14:11:56 -0400 | [diff] [blame] | 348 | } // namespace SK_OPTS_NS |
Mike Klein | ec37097 | 2020-03-05 10:15:35 -0600 | [diff] [blame] | 349 | |
| 350 | #endif//SkVM_opts_DEFINED |