Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "SkCpu.h" |
| 9 | #include "SkOpts.h" |
| 10 | #include "SkRasterPipeline.h" |
| 11 | #include "SkStream.h" |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 12 | #if defined(_MSC_VER) |
| 13 | #include <windows.h> |
| 14 | #else |
| 15 | #include <sys/mman.h> |
| 16 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 17 | |
| 18 | #include "SkSplicer_generated.h" |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 19 | #include "SkSplicer_generated_lowp.h" |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 20 | #include "SkSplicer_shared.h" |
| 21 | |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 22 | // Uncomment to dump output JIT'd pipeline. |
| 23 | //#define DUMP "/tmp/dump.bin" |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 24 | //#define DUMP "/data/local/tmp/dump.bin" |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 25 | // |
| 26 | // On x86, we'll include IACA markers too. |
| 27 | // https://software.intel.com/en-us/articles/intel-architecture-code-analyzer |
| 28 | // Running IACA will disassemble, and more. |
| 29 | // $ ./iaca.sh -arch HSW -64 -mark 0 /tmp/dump.bin | less |
| 30 | // |
| 31 | // To disassemble an aarch64 dump, |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 32 | // $ adb pull /data/local/tmp/dump.bin; gobjdump -b binary -D dump.bin -m aarch64 | less |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 33 | // |
| 34 | // To disassemble an armv7 dump, |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 35 | // $ adb pull /data/local/tmp/dump.bin; gobjdump -b binary -D dump.bin -m arm | less |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 36 | |
| 37 | namespace { |
| 38 | |
| 39 | // Stages expect these constants to be set to these values. |
| 40 | // It's fine to rearrange and add new ones if you update SkSplicer_constants. |
| 41 | static const SkSplicer_constants kConstants = { |
| 42 | 0x000000ff, 1.0f, 255.0f, 1/255.0f, |
| 43 | 0.0025f, 0.6975f, 0.3000f, 1/12.92f, 0.055f, // from_srgb |
| 44 | 12.46f, 0.411192f, 0.689206f, -0.0988f, 0.0043f, // to_srgb |
| 45 | }; |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 46 | static const SkSplicer_constants_lowp kConstants_lowp = { |
| 47 | 0x0001, 0x8000, |
| 48 | }; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 49 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 50 | // We do this a lot, so it's nice to infer the correct size. Works fine with arrays. |
| 51 | template <typename T> |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 52 | static void splice(SkWStream* buf, const T& val) { |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 53 | // This null check makes determining whether we can drop to lowp easier. |
| 54 | // It's always known at compile time.. |
| 55 | if (buf) { |
| 56 | buf->write(&val, sizeof(val)); |
| 57 | } |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 58 | } |
| 59 | |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 60 | #if defined(__aarch64__) |
| 61 | static constexpr int kStride = 4; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 62 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 63 | uint16_t parts[4]; |
| 64 | memcpy(parts, &ctx, 8); |
| 65 | splice(buf, 0xd2f00000 | (parts[3] << 5) | 0x2); // move 16-bit intermediate << 48 into x2 |
| 66 | splice(buf, 0xf2c00000 | (parts[2] << 5) | 0x2); // merge 16-bit intermediate << 32 into x2 |
| 67 | splice(buf, 0xf2a00000 | (parts[1] << 5) | 0x2); // merge 16-bit intermediate << 16 into x2 |
| 68 | splice(buf, 0xf2800000 | (parts[0] << 5) | 0x2); // merge 16-bit intermediate << 0 into x2 |
| 69 | } |
| 70 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 71 | splice(buf, 0xeb01001f); // cmp x0, x1 |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 72 | int off = loop_start - (int)buf->bytesWritten(); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 73 | off /= 4; // bytes -> instructions, still signed |
| 74 | off = (off & 0x7ffff) << 5; // 19 bit maximum range (+- 256K instructions) |
| 75 | splice(buf, 0x54000003 | off); // b.cc loop_start (cc == "carry clear", unsigned less than) |
| 76 | } |
| 77 | static void ret(SkWStream* buf) { |
| 78 | splice(buf, 0xd65f03c0); // ret |
| 79 | } |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 80 | #elif defined(__ARM_NEON__) |
| 81 | static constexpr int kStride = 2; |
| 82 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 83 | uint16_t parts[2]; |
| 84 | auto encode = [](uint16_t part) -> uint32_t { |
| 85 | return (part & 0xf000) << 4 | (part & 0xfff); |
| 86 | }; |
| 87 | memcpy(parts, &ctx, 4); |
| 88 | splice(buf, 0xe3002000 | encode(parts[0])); // mov r2, <bottom 16 bits> |
| 89 | splice(buf, 0xe3402000 | encode(parts[1])); // movt r2, <top 16 bits> |
| 90 | } |
| 91 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 92 | splice(buf, 0xe1500001); // cmp r0, r1 |
| 93 | int off = loop_start - ((int)buf->bytesWritten() + 8 /*ARM is weird*/); |
| 94 | off /= 4; // bytes -> instructions, still signed |
| 95 | off = (off & 0x00ffffff); |
| 96 | splice(buf, 0x3a000000 | off); // bcc loop_start |
| 97 | } |
| 98 | static void ret(SkWStream* buf) { |
| 99 | splice(buf, 0xe12fff1e); // bx lr |
| 100 | } |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 101 | #else |
| 102 | static constexpr int kStride = 8; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 103 | static void set_ctx(SkWStream* buf, void* ctx) { |
| 104 | static const uint8_t movabsq_rdx[] = { 0x48, 0xba }; |
| 105 | splice(buf, movabsq_rdx); // movabsq <next 8 bytes>, %rdx |
| 106 | splice(buf, ctx); |
| 107 | } |
| 108 | static void loop(SkWStream* buf, int loop_start) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 109 | static const uint8_t cmp_rsi_rdi[] = { 0x48, 0x39, 0xf7 }; |
| 110 | static const uint8_t jb_near[] = { 0x0f, 0x8c }; |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 111 | splice(buf, cmp_rsi_rdi); // cmp %rsi, %rdi |
| 112 | splice(buf, jb_near); // jb <next 4 bytes> (b == "before", unsigned less than) |
| 113 | splice(buf, loop_start - (int)(buf->bytesWritten() + 4)); |
| 114 | } |
| 115 | static void ret(SkWStream* buf) { |
| 116 | static const uint8_t vzeroupper[] = { 0xc5, 0xf8, 0x77 }; |
| 117 | static const uint8_t ret[] = { 0xc3 }; |
| 118 | splice(buf, vzeroupper); |
| 119 | splice(buf, ret); |
| 120 | } |
| 121 | #endif |
| 122 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 123 | #if defined(_MSC_VER) |
| 124 | // Adapt from MS ABI to System V ABI used by stages. |
| 125 | static void before_loop(SkWStream* buf) { |
| 126 | static const uint8_t ms_to_system_v[] = { |
| 127 | 0x56, // push %rsi |
| 128 | 0x57, // push %rdi |
| 129 | 0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp |
| 130 | 0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp) |
| 131 | 0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp) |
| 132 | 0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp) |
| 133 | 0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp) |
| 134 | 0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp) |
| 135 | 0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp) |
| 136 | 0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp) |
| 137 | 0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp) |
| 138 | 0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp) |
| 139 | 0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp) |
| 140 | 0x48,0x89,0xcf, // mov %rcx,%rdi |
| 141 | 0x48,0x89,0xd6, // mov %rdx,%rsi |
| 142 | 0x4c,0x89,0xc2, // mov %r8,%rdx |
| 143 | 0x4c,0x89,0xc9, // mov %r9,%rcx |
| 144 | }; |
| 145 | splice(buf, ms_to_system_v); |
| 146 | } |
| 147 | static void after_loop(SkWStream* buf) { |
| 148 | static const uint8_t system_v_to_ms[] = { |
| 149 | 0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6 |
| 150 | 0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7 |
| 151 | 0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8 |
| 152 | 0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9 |
| 153 | 0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10 |
| 154 | 0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11 |
| 155 | 0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12 |
| 156 | 0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13 |
| 157 | 0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14 |
| 158 | 0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15 |
| 159 | 0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp |
| 160 | 0x5f, // pop %rdi |
| 161 | 0x5e, // pop %rsi |
| 162 | }; |
| 163 | splice(buf, system_v_to_ms); |
| 164 | } |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 165 | #elif !defined(__aarch64__) && !defined(__ARM_NEON__) && defined(DUMP) |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 166 | // IACA start and end markers. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 167 | static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run |
| 168 | static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op |
| 169 | static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx |
| 170 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 171 | static void before_loop(SkWStream* buf) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 172 | splice(buf, ud2); |
| 173 | splice(buf, movl_ebx); |
| 174 | splice(buf, 111); |
| 175 | splice(buf, nop3); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 176 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 177 | static void after_loop(SkWStream* buf) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 178 | splice(buf, movl_ebx); |
| 179 | splice(buf, 222); |
| 180 | splice(buf, nop3); |
| 181 | splice(buf, ud2); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 182 | } |
| 183 | #else |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 184 | static void before_loop(SkWStream*) {} |
| 185 | static void after_loop (SkWStream*) {} |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 186 | #endif |
| 187 | |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 188 | // We can only mprotect / VirtualProtect at 4K page granularity. |
| 189 | static size_t round_up_to_full_pages(size_t len) { |
| 190 | size_t size = 0; |
| 191 | while (size < len) { |
| 192 | size += 4096; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 193 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 194 | return size; |
| 195 | } |
| 196 | |
| 197 | #if defined(_MSC_VER) |
| 198 | // Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem(). |
| 199 | static void* copy_to_executable_mem(const void* src, size_t* len) { |
| 200 | if (!src || !*len) { |
| 201 | return nullptr; |
| 202 | } |
| 203 | |
| 204 | size_t alloc = round_up_to_full_pages(*len); |
| 205 | |
| 206 | auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
| 207 | memcpy(fn, src, *len); |
| 208 | |
| 209 | DWORD dont_care; |
| 210 | VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care); |
| 211 | |
| 212 | *len = alloc; |
| 213 | return fn; |
| 214 | } |
| 215 | static void cleanup_executable_mem(void* fn, size_t len) { |
| 216 | if (fn) { |
| 217 | VirtualFree(fn, 0, MEM_RELEASE); |
| 218 | } |
| 219 | } |
| 220 | #else |
| 221 | static void* copy_to_executable_mem(const void* src, size_t* len) { |
| 222 | if (!src || !*len) { |
| 223 | return nullptr; |
| 224 | } |
| 225 | |
| 226 | size_t alloc = round_up_to_full_pages(*len); |
| 227 | |
| 228 | auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); |
| 229 | memcpy(fn, src, *len); |
| 230 | |
| 231 | mprotect(fn, alloc, PROT_READ|PROT_EXEC); |
| 232 | __builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86. |
| 233 | |
| 234 | *len = alloc; |
| 235 | return fn; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 236 | } |
| 237 | static void cleanup_executable_mem(void* fn, size_t len) { |
| 238 | if (fn) { |
| 239 | munmap(fn, len); |
| 240 | } |
| 241 | } |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 242 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 243 | |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 244 | static bool splice_lowp(SkWStream* buf, SkRasterPipeline::StockStage st) { |
| 245 | switch (st) { |
| 246 | default: return false; |
| 247 | case SkRasterPipeline::clamp_0: break; // lowp can't go below 0. |
| 248 | #define CASE(st) case SkRasterPipeline::st: splice(buf, kSplice_##st##_lowp); break |
| 249 | CASE(clear); |
| 250 | CASE(plus_); |
| 251 | CASE(srcover); |
| 252 | CASE(dstover); |
| 253 | CASE(clamp_1); |
| 254 | CASE(clamp_a); |
| 255 | CASE(swap); |
| 256 | CASE(move_src_dst); |
| 257 | CASE(move_dst_src); |
| 258 | CASE(premul); |
| 259 | CASE(load_8888); |
| 260 | CASE(store_8888); |
| 261 | #undef CASE |
| 262 | } |
| 263 | return true; |
| 264 | } |
| 265 | |
| 266 | static bool splice_highp(SkWStream* buf, SkRasterPipeline::StockStage st) { |
| 267 | switch (st) { |
| 268 | default: return false; |
| 269 | #define CASE(st) case SkRasterPipeline::st: splice(buf, kSplice_##st); break |
| 270 | CASE(clear); |
| 271 | CASE(plus_); |
| 272 | CASE(srcover); |
| 273 | CASE(dstover); |
| 274 | CASE(clamp_0); |
| 275 | CASE(clamp_1); |
| 276 | CASE(clamp_a); |
| 277 | CASE(swap); |
| 278 | CASE(move_src_dst); |
| 279 | CASE(move_dst_src); |
| 280 | CASE(premul); |
| 281 | CASE(unpremul); |
| 282 | CASE(from_srgb); |
| 283 | CASE(to_srgb); |
| 284 | CASE(scale_u8); |
| 285 | CASE(load_tables); |
| 286 | CASE(load_8888); |
| 287 | CASE(store_8888); |
| 288 | CASE(load_f16); |
| 289 | CASE(store_f16); |
| 290 | CASE(matrix_3x4); |
| 291 | #undef CASE |
| 292 | } |
| 293 | return true; |
| 294 | } |
| 295 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 296 | struct Spliced { |
| 297 | |
| 298 | Spliced(const SkRasterPipeline::Stage* stages, int nstages) { |
| 299 | // We always create a backup interpreter pipeline, |
| 300 | // - to handle any program we can't, and |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 301 | // - to handle the n < stride tails. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 302 | fBackup = SkOpts::compile_pipeline(stages, nstages); |
| 303 | fSplicedLen = 0; |
| 304 | fSpliced = nullptr; |
| 305 | // If we return early anywhere in here, !fSpliced means we'll use fBackup instead. |
| 306 | |
Mike Klein | 4ef8cb3 | 2017-01-12 11:36:46 -0500 | [diff] [blame] | 307 | #if defined(__aarch64__) |
| 308 | #elif defined(__ARM_NEON__) |
| 309 | // Late generation ARMv7, e.g. Cortex A15 or Krait. |
| 310 | if (!SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) { |
| 311 | return; |
| 312 | } |
| 313 | #else |
| 314 | // To keep things simple, only one x86 target supported: Haswell+ x86-64. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 315 | if (!SkCpu::Supports(SkCpu::HSW) || sizeof(void*) != 8) { |
| 316 | return; |
| 317 | } |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 318 | #endif |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 319 | |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 320 | // See if all the stages can run in lowp mode. If so, we can run at ~2x speed. |
| 321 | bool lowp = true; |
| 322 | for (int i = 0; i < nstages; i++) { |
| 323 | if (!splice_lowp(nullptr, stages[i].stage)) { |
| 324 | //SkDebugf("SkSplicer can't yet handle stage %d in lowp.\n", stages[i].stage); |
| 325 | lowp = false; |
| 326 | break; |
| 327 | } |
| 328 | } |
| 329 | fLowp = lowp; |
| 330 | |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 331 | SkDynamicMemoryWStream buf; |
| 332 | |
Mike Klein | 7ba89a1 | 2017-01-10 13:42:51 -0500 | [diff] [blame] | 333 | // Our loop is the equivalent of this C++ code: |
| 334 | // do { |
| 335 | // ... run spliced stages... |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 336 | // x += stride; |
Mike Klein | 7ba89a1 | 2017-01-10 13:42:51 -0500 | [diff] [blame] | 337 | // } while(x < limit); |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 338 | before_loop(&buf); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 339 | auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start: |
| 340 | |
| 341 | for (int i = 0; i < nstages; i++) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 342 | // If a stage has a context pointer, load it into rdx/x2, Stage argument 3 "ctx". |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 343 | if (stages[i].ctx) { |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 344 | set_ctx(&buf, stages[i].ctx); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 345 | } |
| 346 | |
| 347 | // Splice in the code for the Stages, generated offline into SkSplicer_generated.h. |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 348 | if (lowp) { |
| 349 | SkAssertResult(splice_lowp(&buf, stages[i].stage)); |
| 350 | continue; |
| 351 | } |
| 352 | if (!splice_highp(&buf, stages[i].stage)) { |
| 353 | //SkDebugf("SkSplicer can't yet handle stage %d.\n", stages[i].stage); |
| 354 | return; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 355 | } |
| 356 | } |
| 357 | |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 358 | lowp ? splice(&buf, kSplice_inc_x_lowp) |
| 359 | : splice(&buf, kSplice_inc_x); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 360 | loop(&buf, loop_start); // Loop back to handle more pixels if not done. |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 361 | after_loop(&buf); |
Mike Klein | 8e619a2 | 2017-01-09 17:21:32 -0500 | [diff] [blame] | 362 | ret(&buf); // We're done. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 363 | |
| 364 | auto data = buf.detachAsData(); |
| 365 | fSplicedLen = data->size(); |
Mike Klein | 09326e7 | 2017-01-11 13:41:30 -0500 | [diff] [blame] | 366 | fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 367 | |
Mike Klein | 13ccda4 | 2017-01-10 14:09:24 -0500 | [diff] [blame] | 368 | #if defined(DUMP) |
| 369 | SkFILEWStream(DUMP).write(data->data(), data->size()); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 370 | #endif |
| 371 | } |
| 372 | |
| 373 | // Spliced is stored in a std::function, so it needs to be copyable. |
| 374 | Spliced(const Spliced& o) : fBackup (o.fBackup) |
| 375 | , fSplicedLen(o.fSplicedLen) |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 376 | , fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) |
| 377 | , fLowp (o.fLowp) {} |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 378 | |
| 379 | ~Spliced() { |
| 380 | cleanup_executable_mem(fSpliced, fSplicedLen); |
| 381 | } |
| 382 | |
| 383 | // Here's where we call fSpliced if we created it, fBackup if not. |
| 384 | void operator()(size_t x, size_t y, size_t n) const { |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 385 | size_t stride = fLowp ? kStride*2 |
| 386 | : kStride; |
| 387 | size_t body = n/stride*stride; // Largest multiple of stride (2, 4, 8, or 16) <= n. |
| 388 | if (fSpliced && body) { // Can we run fSpliced for at least one stride? |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 389 | // TODO: At some point we will want to pass in y... |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 390 | using Fn = void(size_t x, size_t limit, void* ctx, const void* k); |
| 391 | auto k = fLowp ? (const void*)&kConstants_lowp |
| 392 | : (const void*)&kConstants; |
| 393 | ((Fn*)fSpliced)(x, x+body, nullptr, k); |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 394 | |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 395 | // Fall through to fBackup for any n<stride last pixels. |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 396 | x += body; |
| 397 | n -= body; |
| 398 | } |
| 399 | fBackup(x,y,n); |
| 400 | } |
| 401 | |
| 402 | std::function<void(size_t, size_t, size_t)> fBackup; |
| 403 | size_t fSplicedLen; |
| 404 | void* fSpliced; |
Mike Klein | f720098 | 2017-01-15 18:14:07 -0500 | [diff] [blame^] | 405 | bool fLowp; |
Mike Klein | a708026 | 2017-01-09 10:20:13 -0500 | [diff] [blame] | 406 | }; |
| 407 | |
| 408 | } |
| 409 | |
| 410 | std::function<void(size_t, size_t, size_t)> SkRasterPipeline::jit() const { |
| 411 | return Spliced(fStages.data(), SkToInt(fStages.size())); |
| 412 | } |