Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "SkCpu.h" |
| 9 | #include "SkOpts.h" |
| 10 | #include "SkRasterPipeline.h" |
| 11 | #include "SkStream.h" |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 12 | #if defined(_MSC_VER) |
| 13 | #include <windows.h> |
| 14 | #else |
| 15 | #include <sys/mman.h> |
| 16 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 17 | |
| 18 | #include "SkSplicer_generated.h" |
| 19 | #include "SkSplicer_shared.h" |
| 20 | |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 21 | // Uncomment to dump output JIT'd pipeline. |
| 22 | //#define DUMP "/tmp/dump.bin" |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 23 | //#define DUMP "/data/local/tmp/dump.bin" |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 24 | // |
| 25 | // On x86, we'll include IACA markers too. |
| 26 | // https://software.intel.com/en-us/articles/intel-architecture-code-analyzer |
| 27 | // Running IACA will disassemble, and more. |
| 28 | // $ ./iaca.sh -arch HSW -64 -mark 0 /tmp/dump.bin | less |
| 29 | // |
| 30 | // To disassemble an aarch64 dump, |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 31 | // $ adb pull /data/local/tmp/dump.bin; gobjdump -b binary -D dump.bin -m aarch64 | less |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 32 | // |
| 33 | // To disassemble an armv7 dump, |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 34 | // $ adb pull /data/local/tmp/dump.bin; gobjdump -b binary -D dump.bin -m arm | less |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 35 | |
| 36 | namespace { |
| 37 | |
| 38 | // Stages expect these constants to be set to these values. |
| 39 | // It's fine to rearrange and add new ones if you update SkSplicer_constants. |
| 40 | static const SkSplicer_constants kConstants = { |
Mike Klein | 8f297c9 | 2017-01-20 19:16:10 -0500 | [diff] [blame] | 41 | 1.0f, 255.0f, 1/255.0f, 0x000000ff, |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 42 | 0.0025f, 0.6975f, 0.3000f, 1/12.92f, 0.055f, // from_srgb |
| 43 | 12.46f, 0.411192f, 0.689206f, -0.0988f, 0.0043f, // to_srgb |
| 44 | }; |
| 45 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 46 | // We do this a lot, so it's nice to infer the correct size. Works fine with arrays. |
| 47 | template <typename T> |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 48 | static void splice(SkWStream* buf, const T& val) { |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 49 | buf->write(&val, sizeof(val)); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 50 | } |
| 51 | |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 52 | #if defined(__aarch64__) |
| 53 | static constexpr int kStride = 4; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 54 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 55 | uint16_t parts[4]; |
| 56 | memcpy(parts, &ctx, 8); |
| 57 | splice(buf, 0xd2f00000 | (parts[3] << 5) | 0x2); // move 16-bit intermediate << 48 into x2 |
| 58 | splice(buf, 0xf2c00000 | (parts[2] << 5) | 0x2); // merge 16-bit intermediate << 32 into x2 |
| 59 | splice(buf, 0xf2a00000 | (parts[1] << 5) | 0x2); // merge 16-bit intermediate << 16 into x2 |
| 60 | splice(buf, 0xf2800000 | (parts[0] << 5) | 0x2); // merge 16-bit intermediate << 0 into x2 |
| 61 | } |
| 62 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 63 | splice(buf, 0xeb01001f); // cmp x0, x1 |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 64 | int off = loop_start - (int)buf->bytesWritten(); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 65 | off /= 4; // bytes -> instructions, still signed |
| 66 | off = (off & 0x7ffff) << 5; // 19 bit maximum range (+- 256K instructions) |
| 67 | splice(buf, 0x54000003 | off); // b.cc loop_start (cc == "carry clear", unsigned less than) |
| 68 | } |
| 69 | static void ret(SkWStream* buf) { |
| 70 | splice(buf, 0xd65f03c0); // ret |
| 71 | } |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 72 | #elif defined(__ARM_NEON__) |
| 73 | static constexpr int kStride = 2; |
| 74 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 75 | uint16_t parts[2]; |
| 76 | auto encode = [](uint16_t part) -> uint32_t { |
| 77 | return (part & 0xf000) << 4 | (part & 0xfff); |
| 78 | }; |
| 79 | memcpy(parts, &ctx, 4); |
| 80 | splice(buf, 0xe3002000 | encode(parts[0])); // mov r2, <bottom 16 bits> |
| 81 | splice(buf, 0xe3402000 | encode(parts[1])); // movt r2, <top 16 bits> |
| 82 | } |
| 83 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 84 | splice(buf, 0xe1500001); // cmp r0, r1 |
| 85 | int off = loop_start - ((int)buf->bytesWritten() + 8 /*ARM is weird*/); |
| 86 | off /= 4; // bytes -> instructions, still signed |
| 87 | off = (off & 0x00ffffff); |
| 88 | splice(buf, 0x3a000000 | off); // bcc loop_start |
| 89 | } |
| 90 | static void ret(SkWStream* buf) { |
| 91 | splice(buf, 0xe12fff1e); // bx lr |
| 92 | } |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 93 | #else |
| 94 | static constexpr int kStride = 8; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 95 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 96 | static const uint8_t movabsq_rdx[] = { 0x48, 0xba }; |
| 97 | splice(buf, movabsq_rdx); // movabsq <next 8 bytes>, %rdx |
| 98 | splice(buf, ctx); |
| 99 | } |
| 100 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 101 | static const uint8_t cmp_rsi_rdi[] = { 0x48, 0x39, 0xf7 }; |
| 102 | static const uint8_t jb_near[] = { 0x0f, 0x8c }; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 103 | splice(buf, cmp_rsi_rdi); // cmp %rsi, %rdi |
| 104 | splice(buf, jb_near); // jb <next 4 bytes> (b == "before", unsigned less than) |
| 105 | splice(buf, loop_start - (int)(buf->bytesWritten() + 4)); |
| 106 | } |
| 107 | static void ret(SkWStream* buf) { |
| 108 | static const uint8_t vzeroupper[] = { 0xc5, 0xf8, 0x77 }; |
| 109 | static const uint8_t ret[] = { 0xc3 }; |
| 110 | splice(buf, vzeroupper); |
| 111 | splice(buf, ret); |
| 112 | } |
| 113 | #endif |
| 114 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 115 | #if defined(_MSC_VER) |
| 116 | // Adapt from MS ABI to System V ABI used by stages. |
| 117 | static void before_loop(SkWStream* buf) { |
| 118 | static const uint8_t ms_to_system_v[] = { |
| 119 | 0x56, // push %rsi |
| 120 | 0x57, // push %rdi |
| 121 | 0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp |
| 122 | 0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp) |
| 123 | 0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp) |
| 124 | 0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp) |
| 125 | 0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp) |
| 126 | 0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp) |
| 127 | 0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp) |
| 128 | 0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp) |
| 129 | 0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp) |
| 130 | 0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp) |
| 131 | 0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp) |
| 132 | 0x48,0x89,0xcf, // mov %rcx,%rdi |
| 133 | 0x48,0x89,0xd6, // mov %rdx,%rsi |
| 134 | 0x4c,0x89,0xc2, // mov %r8,%rdx |
| 135 | 0x4c,0x89,0xc9, // mov %r9,%rcx |
| 136 | }; |
| 137 | splice(buf, ms_to_system_v); |
| 138 | } |
| 139 | static void after_loop(SkWStream* buf) { |
| 140 | static const uint8_t system_v_to_ms[] = { |
| 141 | 0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6 |
| 142 | 0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7 |
| 143 | 0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8 |
| 144 | 0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9 |
| 145 | 0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10 |
| 146 | 0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11 |
| 147 | 0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12 |
| 148 | 0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13 |
| 149 | 0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14 |
| 150 | 0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15 |
| 151 | 0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp |
| 152 | 0x5f, // pop %rdi |
| 153 | 0x5e, // pop %rsi |
| 154 | }; |
| 155 | splice(buf, system_v_to_ms); |
| 156 | } |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 157 | #elif !defined(__aarch64__) && !defined(__ARM_NEON__) && defined(DUMP) |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 158 | // IACA start and end markers. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 159 | static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run |
| 160 | static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op |
| 161 | static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx |
| 162 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 163 | static void before_loop(SkWStream* buf) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 164 | splice(buf, ud2); |
| 165 | splice(buf, movl_ebx); |
| 166 | splice(buf, 111); |
| 167 | splice(buf, nop3); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 168 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 169 | static void after_loop(SkWStream* buf) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 170 | splice(buf, movl_ebx); |
| 171 | splice(buf, 222); |
| 172 | splice(buf, nop3); |
| 173 | splice(buf, ud2); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 174 | } |
| 175 | #else |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 176 | static void before_loop(SkWStream*) {} |
| 177 | static void after_loop (SkWStream*) {} |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 178 | #endif |
| 179 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 180 | // We can only mprotect / VirtualProtect at 4K page granularity. |
| 181 | static size_t round_up_to_full_pages(size_t len) { |
| 182 | size_t size = 0; |
| 183 | while (size < len) { |
| 184 | size += 4096; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 185 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 186 | return size; |
| 187 | } |
| 188 | |
| 189 | #if defined(_MSC_VER) |
| 190 | // Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem(). |
| 191 | static void* copy_to_executable_mem(const void* src, size_t* len) { |
| 192 | if (!src || !*len) { |
| 193 | return nullptr; |
| 194 | } |
| 195 | |
| 196 | size_t alloc = round_up_to_full_pages(*len); |
| 197 | |
| 198 | auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
| 199 | memcpy(fn, src, *len); |
| 200 | |
| 201 | DWORD dont_care; |
| 202 | VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care); |
| 203 | |
| 204 | *len = alloc; |
| 205 | return fn; |
| 206 | } |
| 207 | static void cleanup_executable_mem(void* fn, size_t len) { |
| 208 | if (fn) { |
| 209 | VirtualFree(fn, 0, MEM_RELEASE); |
| 210 | } |
| 211 | } |
| 212 | #else |
| 213 | static void* copy_to_executable_mem(const void* src, size_t* len) { |
| 214 | if (!src || !*len) { |
| 215 | return nullptr; |
| 216 | } |
| 217 | |
| 218 | size_t alloc = round_up_to_full_pages(*len); |
| 219 | |
| 220 | auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); |
| 221 | memcpy(fn, src, *len); |
| 222 | |
| 223 | mprotect(fn, alloc, PROT_READ|PROT_EXEC); |
| 224 | __builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86. |
| 225 | |
| 226 | *len = alloc; |
| 227 | return fn; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 228 | } |
| 229 | static void cleanup_executable_mem(void* fn, size_t len) { |
| 230 | if (fn) { |
| 231 | munmap(fn, len); |
| 232 | } |
| 233 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 234 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 235 | |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 236 | static bool splice(SkWStream* buf, SkRasterPipeline::StockStage st) { |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 237 | switch (st) { |
| 238 | default: return false; |
| 239 | #define CASE(st) case SkRasterPipeline::st: splice(buf, kSplice_##st); break |
| 240 | CASE(clear); |
| 241 | CASE(plus_); |
| 242 | CASE(srcover); |
| 243 | CASE(dstover); |
| 244 | CASE(clamp_0); |
| 245 | CASE(clamp_1); |
| 246 | CASE(clamp_a); |
| 247 | CASE(swap); |
| 248 | CASE(move_src_dst); |
| 249 | CASE(move_dst_src); |
| 250 | CASE(premul); |
| 251 | CASE(unpremul); |
| 252 | CASE(from_srgb); |
| 253 | CASE(to_srgb); |
| 254 | CASE(scale_u8); |
| 255 | CASE(load_tables); |
| 256 | CASE(load_8888); |
| 257 | CASE(store_8888); |
| 258 | CASE(load_f16); |
| 259 | CASE(store_f16); |
| 260 | CASE(matrix_3x4); |
| 261 | #undef CASE |
| 262 | } |
| 263 | return true; |
| 264 | } |
| 265 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 266 | struct Spliced { |
| 267 | |
| 268 | Spliced(const SkRasterPipeline::Stage* stages, int nstages) { |
| 269 | // We always create a backup interpreter pipeline, |
| 270 | // - to handle any program we can't, and |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 271 | // - to handle the n < stride tails. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 272 | fBackup = SkOpts::compile_pipeline(stages, nstages); |
| 273 | fSplicedLen = 0; |
| 274 | fSpliced = nullptr; |
| 275 | // If we return early anywhere in here, !fSpliced means we'll use fBackup instead. |
| 276 | |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 277 | #if defined(__aarch64__) |
| 278 | #elif defined(__ARM_NEON__) |
| 279 | // Late generation ARMv7, e.g. Cortex A15 or Krait. |
| 280 | if (!SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) { |
| 281 | return; |
| 282 | } |
| 283 | #else |
| 284 | // To keep things simple, only one x86 target supported: Haswell+ x86-64. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 285 | if (!SkCpu::Supports(SkCpu::HSW) || sizeof(void*) != 8) { |
| 286 | return; |
| 287 | } |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 288 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 289 | |
| 290 | SkDynamicMemoryWStream buf; |
| 291 | |
Mike Klein | 7ba89a1 | 2017-01-10 13:42:51 -0500 | [diff] [blame] | 292 | // Our loop is the equivalent of this C++ code: |
| 293 | // do { |
| 294 | // ... run spliced stages... |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 295 | // x += stride; |
Mike Klein | 7ba89a1 | 2017-01-10 13:42:51 -0500 | [diff] [blame] | 296 | // } while(x < limit); |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 297 | before_loop(&buf); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 298 | auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start: |
| 299 | |
| 300 | for (int i = 0; i < nstages; i++) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 301 | // If a stage has a context pointer, load it into rdx/x2, Stage argument 3 "ctx". |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 302 | if (stages[i].ctx) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 303 | set_ctx(&buf, stages[i].ctx); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 304 | } |
| 305 | |
| 306 | // Splice in the code for the Stages, generated offline into SkSplicer_generated.h. |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 307 | if (!splice(&buf, stages[i].stage)) { |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 308 | //SkDebugf("SkSplicer can't yet handle stage %d.\n", stages[i].stage); |
| 309 | return; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 310 | } |
| 311 | } |
| 312 | |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 313 | splice(&buf, kSplice_inc_x); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 314 | loop(&buf, loop_start); // Loop back to handle more pixels if not done. |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 315 | after_loop(&buf); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 316 | ret(&buf); // We're done. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 317 | |
| 318 | auto data = buf.detachAsData(); |
| 319 | fSplicedLen = data->size(); |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 320 | fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 321 | |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 322 | #if defined(DUMP) |
| 323 | SkFILEWStream(DUMP).write(data->data(), data->size()); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 324 | #endif |
| 325 | } |
| 326 | |
| 327 | // Spliced is stored in a std::function, so it needs to be copyable. |
| 328 | Spliced(const Spliced& o) : fBackup (o.fBackup) |
| 329 | , fSplicedLen(o.fSplicedLen) |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 330 | , fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) {} |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 331 | |
| 332 | ~Spliced() { |
| 333 | cleanup_executable_mem(fSpliced, fSplicedLen); |
| 334 | } |
| 335 | |
| 336 | // Here's where we call fSpliced if we created it, fBackup if not. |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 337 | void operator()(size_t x, size_t n) const { |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 338 | size_t body = n/kStride*kStride; // Largest multiple of kStride (2, 4, 8, or 16) <= n. |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 339 | if (fSpliced && body) { // Can we run fSpliced for at least one stride? |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 340 | using Fn = void(size_t x, size_t limit, void* ctx, const void* k); |
Mike Klein | a13b248 | 2017-02-07 10:11:52 -0500 | [diff] [blame^] | 341 | ((Fn*)fSpliced)(x, x+body, nullptr, &kConstants); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 342 | |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame] | 343 | // Fall through to fBackup for any n<stride last pixels. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 344 | x += body; |
| 345 | n -= body; |
| 346 | } |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 347 | fBackup(x,n); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 348 | } |
| 349 | |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 350 | std::function<void(size_t, size_t)> fBackup; |
| 351 | size_t fSplicedLen; |
| 352 | void* fSpliced; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 353 | }; |
| 354 | |
| 355 | } |
| 356 | |
Mike Klein | 319ba3d | 2017-01-20 15:11:54 -0500 | [diff] [blame] | 357 | std::function<void(size_t, size_t)> SkRasterPipeline::jit() const { |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 358 | return Spliced(fStages.data(), SkToInt(fStages.size())); |
| 359 | } |