Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "SkCpu.h" |
| 9 | #include "SkOpts.h" |
| 10 | #include "SkRasterPipeline.h" |
| 11 | #include "SkStream.h" |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 12 | #if defined(_MSC_VER) |
| 13 | #include <windows.h> |
| 14 | #else |
| 15 | #include <sys/mman.h> |
| 16 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 17 | |
| 18 | #include "SkSplicer_generated.h" |
| 19 | #include "SkSplicer_shared.h" |
| 20 | |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 21 | // Uncomment to dump output JIT'd pipeline. |
| 22 | //#define DUMP "/tmp/dump.bin" |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 23 | //#define DUMP "/data/local/tmp/dump.bin" |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 24 | // |
| 25 | // On x86, we'll include IACA markers too. |
| 26 | // https://software.intel.com/en-us/articles/intel-architecture-code-analyzer |
| 27 | // Running IACA will disassemble, and more. |
| 28 | // $ ./iaca.sh -arch HSW -64 -mark 0 /tmp/dump.bin | less |
| 29 | // |
| 30 | // To disassemble an aarch64 dump, |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 31 | // $ adb pull /data/local/tmp/dump.bin; gobjdump -b binary -D dump.bin -m aarch64 | less |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 32 | // |
| 33 | // To disassemble an armv7 dump, |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 34 | // $ adb pull /data/local/tmp/dump.bin; gobjdump -b binary -D dump.bin -m arm | less |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 35 | |
| 36 | namespace { |
| 37 | |
| 38 | // Stages expect these constants to be set to these values. |
| 39 | // It's fine to rearrange and add new ones if you update SkSplicer_constants. |
| 40 | static const SkSplicer_constants kConstants = { |
Mike Klein | 8f297c9 | 2017-01-20 19:16:10 -0500 | [diff] [blame] | 41 | 1.0f, 255.0f, 1/255.0f, 0x000000ff, |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 42 | 0.0025f, 0.6975f, 0.3000f, 1/12.92f, 0.055f, // from_srgb |
| 43 | 12.46f, 0.411192f, 0.689206f, -0.0988f, 0.0043f, // to_srgb |
Mike Klein | 16c1496 | 2017-02-08 12:50:17 -0500 | [diff] [blame^] | 44 | 0x77800000, 0x07800000, // fp16 <-> fp32 |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 45 | }; |
| 46 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 47 | // We do this a lot, so it's nice to infer the correct size. Works fine with arrays. |
| 48 | template <typename T> |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 49 | static void splice(SkWStream* buf, const T& val) { |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame] | 50 | buf->write(&val, sizeof(val)); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 51 | } |
| 52 | |
Mike Klein | a960095 | 2017-02-07 14:32:25 -0500 | [diff] [blame] | 53 | // Splice up to (but not including) the final return instruction in code. |
| 54 | template <typename T, size_t N> |
| 55 | static void splice_until_ret(SkWStream* buf, const T (&code)[N]) { |
| 56 | // On all platforms we splice today, return is a single T (byte on x86, u32 on ARM). |
| 57 | buf->write(&code, sizeof(T) * (N-1)); |
| 58 | } |
| 59 | |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 60 | #if defined(__aarch64__) |
| 61 | static constexpr int kStride = 4; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 62 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 63 | uint16_t parts[4]; |
| 64 | memcpy(parts, &ctx, 8); |
| 65 | splice(buf, 0xd2f00000 | (parts[3] << 5) | 0x2); // move 16-bit intermediate << 48 into x2 |
| 66 | splice(buf, 0xf2c00000 | (parts[2] << 5) | 0x2); // merge 16-bit intermediate << 32 into x2 |
| 67 | splice(buf, 0xf2a00000 | (parts[1] << 5) | 0x2); // merge 16-bit intermediate << 16 into x2 |
| 68 | splice(buf, 0xf2800000 | (parts[0] << 5) | 0x2); // merge 16-bit intermediate << 0 into x2 |
| 69 | } |
| 70 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 71 | splice(buf, 0xeb01001f); // cmp x0, x1 |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 72 | int off = loop_start - (int)buf->bytesWritten(); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 73 | off /= 4; // bytes -> instructions, still signed |
| 74 | off = (off & 0x7ffff) << 5; // 19 bit maximum range (+- 256K instructions) |
| 75 | splice(buf, 0x54000003 | off); // b.cc loop_start (cc == "carry clear", unsigned less than) |
| 76 | } |
| 77 | static void ret(SkWStream* buf) { |
| 78 | splice(buf, 0xd65f03c0); // ret |
| 79 | } |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 80 | #elif defined(__ARM_NEON__) |
| 81 | static constexpr int kStride = 2; |
| 82 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 83 | uint16_t parts[2]; |
| 84 | auto encode = [](uint16_t part) -> uint32_t { |
| 85 | return (part & 0xf000) << 4 | (part & 0xfff); |
| 86 | }; |
| 87 | memcpy(parts, &ctx, 4); |
| 88 | splice(buf, 0xe3002000 | encode(parts[0])); // mov r2, <bottom 16 bits> |
| 89 | splice(buf, 0xe3402000 | encode(parts[1])); // movt r2, <top 16 bits> |
| 90 | } |
| 91 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 92 | splice(buf, 0xe1500001); // cmp r0, r1 |
| 93 | int off = loop_start - ((int)buf->bytesWritten() + 8 /*ARM is weird*/); |
| 94 | off /= 4; // bytes -> instructions, still signed |
| 95 | off = (off & 0x00ffffff); |
| 96 | splice(buf, 0x3a000000 | off); // bcc loop_start |
| 97 | } |
| 98 | static void ret(SkWStream* buf) { |
| 99 | splice(buf, 0xe12fff1e); // bx lr |
| 100 | } |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 101 | #else |
| 102 | static constexpr int kStride = 8; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 103 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 104 | static const uint8_t movabsq_rdx[] = { 0x48, 0xba }; |
| 105 | splice(buf, movabsq_rdx); // movabsq <next 8 bytes>, %rdx |
| 106 | splice(buf, ctx); |
| 107 | } |
| 108 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 109 | static const uint8_t cmp_rsi_rdi[] = { 0x48, 0x39, 0xf7 }; |
| 110 | static const uint8_t jb_near[] = { 0x0f, 0x8c }; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 111 | splice(buf, cmp_rsi_rdi); // cmp %rsi, %rdi |
| 112 | splice(buf, jb_near); // jb <next 4 bytes> (b == "before", unsigned less than) |
| 113 | splice(buf, loop_start - (int)(buf->bytesWritten() + 4)); |
| 114 | } |
| 115 | static void ret(SkWStream* buf) { |
| 116 | static const uint8_t vzeroupper[] = { 0xc5, 0xf8, 0x77 }; |
| 117 | static const uint8_t ret[] = { 0xc3 }; |
| 118 | splice(buf, vzeroupper); |
| 119 | splice(buf, ret); |
| 120 | } |
| 121 | #endif |
| 122 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 123 | #if defined(_MSC_VER) |
| 124 | // Adapt from MS ABI to System V ABI used by stages. |
| 125 | static void before_loop(SkWStream* buf) { |
| 126 | static const uint8_t ms_to_system_v[] = { |
| 127 | 0x56, // push %rsi |
| 128 | 0x57, // push %rdi |
| 129 | 0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp |
| 130 | 0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp) |
| 131 | 0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp) |
| 132 | 0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp) |
| 133 | 0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp) |
| 134 | 0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp) |
| 135 | 0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp) |
| 136 | 0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp) |
| 137 | 0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp) |
| 138 | 0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp) |
| 139 | 0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp) |
| 140 | 0x48,0x89,0xcf, // mov %rcx,%rdi |
| 141 | 0x48,0x89,0xd6, // mov %rdx,%rsi |
| 142 | 0x4c,0x89,0xc2, // mov %r8,%rdx |
| 143 | 0x4c,0x89,0xc9, // mov %r9,%rcx |
| 144 | }; |
| 145 | splice(buf, ms_to_system_v); |
| 146 | } |
| 147 | static void after_loop(SkWStream* buf) { |
| 148 | static const uint8_t system_v_to_ms[] = { |
| 149 | 0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6 |
| 150 | 0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7 |
| 151 | 0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8 |
| 152 | 0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9 |
| 153 | 0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10 |
| 154 | 0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11 |
| 155 | 0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12 |
| 156 | 0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13 |
| 157 | 0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14 |
| 158 | 0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15 |
| 159 | 0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp |
| 160 | 0x5f, // pop %rdi |
| 161 | 0x5e, // pop %rsi |
| 162 | }; |
| 163 | splice(buf, system_v_to_ms); |
| 164 | } |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 165 | #elif !defined(__aarch64__) && !defined(__ARM_NEON__) && defined(DUMP) |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 166 | // IACA start and end markers. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 167 | static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run |
| 168 | static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op |
| 169 | static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx |
| 170 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 171 | static void before_loop(SkWStream* buf) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 172 | splice(buf, ud2); |
| 173 | splice(buf, movl_ebx); |
| 174 | splice(buf, 111); |
| 175 | splice(buf, nop3); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 176 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 177 | static void after_loop(SkWStream* buf) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 178 | splice(buf, movl_ebx); |
| 179 | splice(buf, 222); |
| 180 | splice(buf, nop3); |
| 181 | splice(buf, ud2); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 182 | } |
| 183 | #else |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 184 | static void before_loop(SkWStream*) {} |
| 185 | static void after_loop (SkWStream*) {} |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 186 | #endif |
| 187 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 188 | // We can only mprotect / VirtualProtect at 4K page granularity. |
| 189 | static size_t round_up_to_full_pages(size_t len) { |
| 190 | size_t size = 0; |
| 191 | while (size < len) { |
| 192 | size += 4096; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 193 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 194 | return size; |
| 195 | } |
| 196 | |
| 197 | #if defined(_MSC_VER) |
| 198 | // Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem(). |
| 199 | static void* copy_to_executable_mem(const void* src, size_t* len) { |
| 200 | if (!src || !*len) { |
| 201 | return nullptr; |
| 202 | } |
| 203 | |
| 204 | size_t alloc = round_up_to_full_pages(*len); |
| 205 | |
| 206 | auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
| 207 | memcpy(fn, src, *len); |
| 208 | |
| 209 | DWORD dont_care; |
| 210 | VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care); |
| 211 | |
| 212 | *len = alloc; |
| 213 | return fn; |
| 214 | } |
| 215 | static void cleanup_executable_mem(void* fn, size_t len) { |
| 216 | if (fn) { |
| 217 | VirtualFree(fn, 0, MEM_RELEASE); |
| 218 | } |
| 219 | } |
| 220 | #else |
| 221 | static void* copy_to_executable_mem(const void* src, size_t* len) { |
| 222 | if (!src || !*len) { |
| 223 | return nullptr; |
| 224 | } |
| 225 | |
| 226 | size_t alloc = round_up_to_full_pages(*len); |
| 227 | |
| 228 | auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); |
| 229 | memcpy(fn, src, *len); |
| 230 | |
| 231 | mprotect(fn, alloc, PROT_READ|PROT_EXEC); |
| 232 | __builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86. |
| 233 | |
| 234 | *len = alloc; |
| 235 | return fn; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 236 | } |
| 237 | static void cleanup_executable_mem(void* fn, size_t len) { |
| 238 | if (fn) { |
| 239 | munmap(fn, len); |
| 240 | } |
| 241 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 242 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 243 | |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame] | 244 | static bool splice(SkWStream* buf, SkRasterPipeline::StockStage st) { |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 245 | switch (st) { |
| 246 | default: return false; |
Mike Klein | a960095 | 2017-02-07 14:32:25 -0500 | [diff] [blame] | 247 | #define CASE(st) case SkRasterPipeline::st: splice_until_ret(buf, kSplice_##st); break |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 248 | CASE(clear); |
| 249 | CASE(plus_); |
| 250 | CASE(srcover); |
| 251 | CASE(dstover); |
| 252 | CASE(clamp_0); |
| 253 | CASE(clamp_1); |
| 254 | CASE(clamp_a); |
| 255 | CASE(swap); |
| 256 | CASE(move_src_dst); |
| 257 | CASE(move_dst_src); |
| 258 | CASE(premul); |
| 259 | CASE(unpremul); |
| 260 | CASE(from_srgb); |
| 261 | CASE(to_srgb); |
| 262 | CASE(scale_u8); |
| 263 | CASE(load_tables); |
| 264 | CASE(load_8888); |
| 265 | CASE(store_8888); |
| 266 | CASE(load_f16); |
| 267 | CASE(store_f16); |
| 268 | CASE(matrix_3x4); |
| 269 | #undef CASE |
| 270 | } |
| 271 | return true; |
| 272 | } |
| 273 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 274 | struct Spliced { |
| 275 | |
| 276 | Spliced(const SkRasterPipeline::Stage* stages, int nstages) { |
| 277 | // We always create a backup interpreter pipeline, |
| 278 | // - to handle any program we can't, and |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 279 | // - to handle the n < stride tails. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 280 | fBackup = SkOpts::compile_pipeline(stages, nstages); |
| 281 | fSplicedLen = 0; |
| 282 | fSpliced = nullptr; |
| 283 | // If we return early anywhere in here, !fSpliced means we'll use fBackup instead. |
| 284 | |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 285 | #if defined(__aarch64__) |
| 286 | #elif defined(__ARM_NEON__) |
| 287 | // Late generation ARMv7, e.g. Cortex A15 or Krait. |
| 288 | if (!SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) { |
| 289 | return; |
| 290 | } |
| 291 | #else |
| 292 | // To keep things simple, only one x86 target supported: Haswell+ x86-64. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 293 | if (!SkCpu::Supports(SkCpu::HSW) || sizeof(void*) != 8) { |
| 294 | return; |
| 295 | } |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 296 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 297 | |
| 298 | SkDynamicMemoryWStream buf; |
| 299 | |
Mike Klein | 7ba89a1 | 2017-01-10 13:42:51 -0500 | [diff] [blame] | 300 | // Our loop is the equivalent of this C++ code: |
| 301 | // do { |
| 302 | // ... run spliced stages... |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 303 | // x += stride; |
Mike Klein | 7ba89a1 | 2017-01-10 13:42:51 -0500 | [diff] [blame] | 304 | // } while(x < limit); |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 305 | before_loop(&buf); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 306 | auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start: |
| 307 | |
| 308 | for (int i = 0; i < nstages; i++) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 309 | // If a stage has a context pointer, load it into rdx/x2, Stage argument 3 "ctx". |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 310 | if (stages[i].ctx) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 311 | set_ctx(&buf, stages[i].ctx); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 312 | } |
| 313 | |
| 314 | // Splice in the code for the Stages, generated offline into SkSplicer_generated.h. |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame] | 315 | if (!splice(&buf, stages[i].stage)) { |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 316 | //SkDebugf("SkSplicer can't yet handle stage %d.\n", stages[i].stage); |
| 317 | return; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 318 | } |
| 319 | } |
| 320 | |
Mike Klein | a960095 | 2017-02-07 14:32:25 -0500 | [diff] [blame] | 321 | splice_until_ret(&buf, kSplice_inc_x); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 322 | loop(&buf, loop_start); // Loop back to handle more pixels if not done. |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 323 | after_loop(&buf); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 324 | ret(&buf); // We're done. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 325 | |
| 326 | auto data = buf.detachAsData(); |
| 327 | fSplicedLen = data->size(); |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 328 | fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 329 | |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 330 | #if defined(DUMP) |
| 331 | SkFILEWStream(DUMP).write(data->data(), data->size()); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 332 | #endif |
| 333 | } |
| 334 | |
| 335 | // Spliced is stored in a std::function, so it needs to be copyable. |
| 336 | Spliced(const Spliced& o) : fBackup (o.fBackup) |
| 337 | , fSplicedLen(o.fSplicedLen) |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame] | 338 | , fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) {} |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 339 | |
| 340 | ~Spliced() { |
| 341 | cleanup_executable_mem(fSpliced, fSplicedLen); |
| 342 | } |
| 343 | |
| 344 | // Here's where we call fSpliced if we created it, fBackup if not. |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 345 | void operator()(size_t x, size_t n) const { |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame] | 346 | size_t body = n/kStride*kStride; // Largest multiple of kStride (2, 4, 8, or 16) <= n. |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 347 | if (fSpliced && body) { // Can we run fSpliced for at least one stride? |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 348 | using Fn = void(size_t x, size_t limit, void* ctx, const void* k); |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame] | 349 | ((Fn*)fSpliced)(x, x+body, nullptr, &kConstants); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 350 | |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 351 | // Fall through to fBackup for any n<stride last pixels. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 352 | x += body; |
| 353 | n -= body; |
| 354 | } |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 355 | fBackup(x,n); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 356 | } |
| 357 | |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 358 | std::function<void(size_t, size_t)> fBackup; |
| 359 | size_t fSplicedLen; |
| 360 | void* fSpliced; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 361 | }; |
| 362 | |
| 363 | } |
| 364 | |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 365 | std::function<void(size_t, size_t)> SkRasterPipeline::jit() const { |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 366 | return Spliced(fStages.data(), SkToInt(fStages.size())); |
| 367 | } |