reland: We can mask load and store with just AVX

Originally reviewed here: https://skia-review.googlesource.com/c/17452/

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-ShuttleA-GPU-GTX550Ti-x86_64-Release-Valgrind

Change-Id: I2e593e897ce93147ec593c2a5de143217274ba2a
Reviewed-on: https://skia-review.googlesource.com/18267
Reviewed-by: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 7fe5326..aebbeb4 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -10847,8 +10847,8 @@
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
   .byte  15,133,180,0,0,0                    // jne           1345 <_sk_srcover_rgba_8888_hsw+0xcd>
-  .byte  196,193,126,111,57                  // vmovdqu       (%r9),%ymm7
-  .byte  197,197,219,37,98,59,0,0            // vpand         0x3b62(%rip),%ymm7,%ymm4        # 4e00 <_sk_callback_hsw+0x53e>
+  .byte  196,193,124,16,57                   // vmovups       (%r9),%ymm7
+  .byte  197,196,84,37,98,59,0,0             // vandps        0x3b62(%rip),%ymm7,%ymm4        # 4e00 <_sk_callback_hsw+0x53e>
   .byte  197,252,91,228                      // vcvtdq2ps     %ymm4,%ymm4
   .byte  196,226,69,0,45,117,59,0,0          // vpshufb       0x3b75(%rip),%ymm7,%ymm5        # 4e20 <_sk_callback_hsw+0x55e>
   .byte  197,252,91,237                      // vcvtdq2ps     %ymm5,%ymm5
@@ -10879,7 +10879,7 @@
   .byte  196,65,61,235,193                   // vpor          %ymm9,%ymm8,%ymm8
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,53                              // jne           136e <_sk_srcover_rgba_8888_hsw+0xf6>
-  .byte  196,65,126,127,1                    // vmovdqu       %ymm8,(%r9)
+  .byte  196,65,124,17,1                     // vmovups       %ymm8,(%r9)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
@@ -10890,7 +10890,7 @@
   .byte  72,211,232                          // shr           %cl,%rax
   .byte  196,225,249,110,224                 // vmovq         %rax,%xmm4
   .byte  196,226,125,33,228                  // vpmovsxbd     %xmm4,%ymm4
-  .byte  196,194,93,140,57                   // vpmaskmovd    (%r9),%ymm4,%ymm7
+  .byte  196,194,93,44,57                    // vmaskmovps    (%r9),%ymm4,%ymm7
   .byte  233,40,255,255,255                  // jmpq          1296 <_sk_srcover_rgba_8888_hsw+0x1e>
   .byte  185,8,0,0,0                         // mov           $0x8,%ecx
   .byte  68,41,193                           // sub           %r8d,%ecx
@@ -10899,7 +10899,7 @@
   .byte  72,211,232                          // shr           %cl,%rax
   .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
   .byte  196,66,125,33,201                   // vpmovsxbd     %xmm9,%ymm9
-  .byte  196,66,53,142,1                     // vpmaskmovd    %ymm8,%ymm9,(%r9)
+  .byte  196,66,53,46,1                      // vmaskmovps    %ymm8,%ymm9,(%r9)
   .byte  235,170                             // jmp           133e <_sk_srcover_rgba_8888_hsw+0xc6>
 
 HIDDEN _sk_clamp_0_hsw
@@ -11418,8 +11418,8 @@
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,105                             // jne           1b6e <_sk_load_tables_hsw+0x7e>
-  .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
-  .byte  197,229,219,13,78,51,0,0            // vpand         0x334e(%rip),%ymm3,%ymm1        # 4e60 <_sk_callback_hsw+0x59e>
+  .byte  196,193,124,16,25                   // vmovups       (%r9),%ymm3
+  .byte  197,228,84,13,78,51,0,0             // vandps        0x334e(%rip),%ymm3,%ymm1        # 4e60 <_sk_callback_hsw+0x59e>
   .byte  196,65,61,118,192                   // vpcmpeqd      %ymm8,%ymm8,%ymm8
   .byte  72,139,72,8                         // mov           0x8(%rax),%rcx
   .byte  76,139,72,16                        // mov           0x10(%rax),%r9
@@ -11445,7 +11445,7 @@
   .byte  73,211,234                          // shr           %cl,%r10
   .byte  196,193,249,110,194                 // vmovq         %r10,%xmm0
   .byte  196,226,125,33,192                  // vpmovsxbd     %xmm0,%ymm0
-  .byte  196,194,125,140,25                  // vpmaskmovd    (%r9),%ymm0,%ymm3
+  .byte  196,194,125,44,25                   // vmaskmovps    (%r9),%ymm0,%ymm3
   .byte  233,115,255,255,255                 // jmpq          1b0a <_sk_load_tables_hsw+0x1a>
 
 HIDDEN _sk_load_tables_u16_be_hsw
@@ -12970,8 +12970,8 @@
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,88                              // jne           3491 <_sk_load_8888_hsw+0x6d>
-  .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
-  .byte  197,229,219,5,218,26,0,0            // vpand         0x1ada(%rip),%ymm3,%ymm0        # 4f20 <_sk_callback_hsw+0x65e>
+  .byte  196,193,124,16,25                   // vmovups       (%r9),%ymm3
+  .byte  197,228,84,5,218,26,0,0             // vandps        0x1ada(%rip),%ymm3,%ymm0        # 4f20 <_sk_callback_hsw+0x65e>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
   .byte  196,98,125,24,5,145,24,0,0          // vbroadcastss  0x1891(%rip),%ymm8        # 4ce4 <_sk_callback_hsw+0x422>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -12994,7 +12994,7 @@
   .byte  72,211,232                          // shr           %cl,%rax
   .byte  196,225,249,110,192                 // vmovq         %rax,%xmm0
   .byte  196,226,125,33,192                  // vpmovsxbd     %xmm0,%ymm0
-  .byte  196,194,125,140,25                  // vpmaskmovd    (%r9),%ymm0,%ymm3
+  .byte  196,194,125,44,25                   // vmaskmovps    (%r9),%ymm0,%ymm3
   .byte  235,135                             // jmp           343e <_sk_load_8888_hsw+0x1a>
 
 HIDDEN _sk_gather_8888_hsw
@@ -13051,7 +13051,7 @@
   .byte  196,65,53,235,192                   // vpor          %ymm8,%ymm9,%ymm8
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,12                              // jne           35a0 <_sk_store_8888_hsw+0x73>
-  .byte  196,65,126,127,1                    // vmovdqu       %ymm8,(%r9)
+  .byte  196,65,124,17,1                     // vmovups       %ymm8,(%r9)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
@@ -13062,7 +13062,7 @@
   .byte  72,211,232                          // shr           %cl,%rax
   .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
   .byte  196,66,125,33,201                   // vpmovsxbd     %xmm9,%ymm9
-  .byte  196,66,53,142,1                     // vpmaskmovd    %ymm8,%ymm9,(%r9)
+  .byte  196,66,53,46,1                      // vmaskmovps    %ymm8,%ymm9,(%r9)
   .byte  235,211                             // jmp           3599 <_sk_store_8888_hsw+0x6c>
 
 HIDDEN _sk_load_f16_hsw
@@ -15093,14 +15093,14 @@
   .byte  197,249,112,192,0                   // vpshufd       $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,171,102,0,0       // vbroadcastss  0x66ab(%rip),%ymm1        # 677c <_sk_callback_avx+0x128>
+  .byte  196,226,125,24,13,211,100,0,0       // vbroadcastss  0x64d3(%rip),%ymm1        # 65a4 <_sk_callback_avx+0x128>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,88,2                        // vaddps        (%rdx),%ymm0,%ymm0
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  197,236,88,201                      // vaddps        %ymm1,%ymm2,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,21,143,102,0,0       // vbroadcastss  0x668f(%rip),%ymm2        # 6780 <_sk_callback_avx+0x12c>
+  .byte  196,226,125,24,21,183,100,0,0       // vbroadcastss  0x64b7(%rip),%ymm2        # 65a8 <_sk_callback_avx+0x12c>
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
   .byte  197,220,87,228                      // vxorps        %ymm4,%ymm4,%ymm4
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
@@ -15123,7 +15123,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  196,66,125,24,8                     // vbroadcastss  (%r8),%ymm9
   .byte  196,65,60,87,209                    // vxorps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,64,102,0,0         // vbroadcastss  0x6640(%rip),%ymm11        # 6784 <_sk_callback_avx+0x130>
+  .byte  196,98,125,24,29,104,100,0,0        // vbroadcastss  0x6468(%rip),%ymm11        # 65ac <_sk_callback_avx+0x130>
   .byte  196,65,44,84,203                    // vandps        %ymm11,%ymm10,%ymm9
   .byte  196,193,25,114,241,5                // vpslld        $0x5,%xmm9,%xmm12
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
@@ -15134,8 +15134,8 @@
   .byte  196,67,125,25,219,1                 // vextractf128  $0x1,%ymm11,%xmm11
   .byte  196,193,33,114,243,4                // vpslld        $0x4,%xmm11,%xmm11
   .byte  196,67,29,24,219,1                  // vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
-  .byte  196,98,125,24,37,1,102,0,0          // vbroadcastss  0x6601(%rip),%ymm12        # 6788 <_sk_callback_avx+0x134>
-  .byte  196,98,125,24,45,252,101,0,0        // vbroadcastss  0x65fc(%rip),%ymm13        # 678c <_sk_callback_avx+0x138>
+  .byte  196,98,125,24,37,41,100,0,0         // vbroadcastss  0x6429(%rip),%ymm12        # 65b0 <_sk_callback_avx+0x134>
+  .byte  196,98,125,24,45,36,100,0,0         // vbroadcastss  0x6424(%rip),%ymm13        # 65b4 <_sk_callback_avx+0x138>
   .byte  196,65,44,84,245                    // vandps        %ymm13,%ymm10,%ymm14
   .byte  196,193,1,114,246,2                 // vpslld        $0x2,%xmm14,%xmm15
   .byte  196,67,125,25,246,1                 // vextractf128  $0x1,%ymm14,%xmm14
@@ -15162,9 +15162,9 @@
   .byte  196,65,12,86,202                    // vorps         %ymm10,%ymm14,%ymm9
   .byte  196,65,60,86,193                    // vorps         %ymm9,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,103,101,0,0        // vbroadcastss  0x6567(%rip),%ymm9        # 6790 <_sk_callback_avx+0x13c>
+  .byte  196,98,125,24,13,143,99,0,0         // vbroadcastss  0x638f(%rip),%ymm9        # 65b8 <_sk_callback_avx+0x13c>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,93,101,0,0         // vbroadcastss  0x655d(%rip),%ymm9        # 6794 <_sk_callback_avx+0x140>
+  .byte  196,98,125,24,13,133,99,0,0         // vbroadcastss  0x6385(%rip),%ymm9        # 65bc <_sk_callback_avx+0x140>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  196,98,125,24,72,8                  // vbroadcastss  0x8(%rax),%ymm9
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
@@ -15233,7 +15233,7 @@
 FUNCTION(_sk_srcatop_avx)
 _sk_srcatop_avx:
   .byte  197,252,89,199                      // vmulps        %ymm7,%ymm0,%ymm0
-  .byte  196,98,125,24,5,180,100,0,0         // vbroadcastss  0x64b4(%rip),%ymm8        # 6798 <_sk_callback_avx+0x144>
+  .byte  196,98,125,24,5,220,98,0,0          // vbroadcastss  0x62dc(%rip),%ymm8        # 65c0 <_sk_callback_avx+0x144>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -15254,7 +15254,7 @@
 FUNCTION(_sk_dstatop_avx)
 _sk_dstatop_avx:
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
-  .byte  196,98,125,24,13,118,100,0,0        // vbroadcastss  0x6476(%rip),%ymm9        # 679c <_sk_callback_avx+0x148>
+  .byte  196,98,125,24,13,158,98,0,0         // vbroadcastss  0x629e(%rip),%ymm9        # 65c4 <_sk_callback_avx+0x148>
   .byte  197,52,92,207                       // vsubps        %ymm7,%ymm9,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,188,88,192                      // vaddps        %ymm0,%ymm8,%ymm0
@@ -15296,7 +15296,7 @@
 .globl _sk_srcout_avx
 FUNCTION(_sk_srcout_avx)
 _sk_srcout_avx:
-  .byte  196,98,125,24,5,21,100,0,0          // vbroadcastss  0x6415(%rip),%ymm8        # 67a0 <_sk_callback_avx+0x14c>
+  .byte  196,98,125,24,5,61,98,0,0           // vbroadcastss  0x623d(%rip),%ymm8        # 65c8 <_sk_callback_avx+0x14c>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -15309,7 +15309,7 @@
 .globl _sk_dstout_avx
 FUNCTION(_sk_dstout_avx)
 _sk_dstout_avx:
-  .byte  196,226,125,24,5,248,99,0,0         // vbroadcastss  0x63f8(%rip),%ymm0        # 67a4 <_sk_callback_avx+0x150>
+  .byte  196,226,125,24,5,32,98,0,0          // vbroadcastss  0x6220(%rip),%ymm0        # 65cc <_sk_callback_avx+0x150>
   .byte  197,252,92,219                      // vsubps        %ymm3,%ymm0,%ymm3
   .byte  197,228,89,196                      // vmulps        %ymm4,%ymm3,%ymm0
   .byte  197,228,89,205                      // vmulps        %ymm5,%ymm3,%ymm1
@@ -15322,7 +15322,7 @@
 .globl _sk_srcover_avx
 FUNCTION(_sk_srcover_avx)
 _sk_srcover_avx:
-  .byte  196,98,125,24,5,219,99,0,0          // vbroadcastss  0x63db(%rip),%ymm8        # 67a8 <_sk_callback_avx+0x154>
+  .byte  196,98,125,24,5,3,98,0,0            // vbroadcastss  0x6203(%rip),%ymm8        # 65d0 <_sk_callback_avx+0x154>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -15339,7 +15339,7 @@
 .globl _sk_dstover_avx
 FUNCTION(_sk_dstover_avx)
 _sk_dstover_avx:
-  .byte  196,98,125,24,5,174,99,0,0          // vbroadcastss  0x63ae(%rip),%ymm8        # 67ac <_sk_callback_avx+0x158>
+  .byte  196,98,125,24,5,214,97,0,0          // vbroadcastss  0x61d6(%rip),%ymm8        # 65d4 <_sk_callback_avx+0x158>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,252,88,196                      // vaddps        %ymm4,%ymm0,%ymm0
@@ -15367,7 +15367,7 @@
 .globl _sk_multiply_avx
 FUNCTION(_sk_multiply_avx)
 _sk_multiply_avx:
-  .byte  196,98,125,24,5,109,99,0,0          // vbroadcastss  0x636d(%rip),%ymm8        # 67b0 <_sk_callback_avx+0x15c>
+  .byte  196,98,125,24,5,149,97,0,0          // vbroadcastss  0x6195(%rip),%ymm8        # 65d8 <_sk_callback_avx+0x15c>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,208                       // vmulps        %ymm0,%ymm9,%ymm10
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -15427,7 +15427,7 @@
 .globl _sk_xor__avx
 FUNCTION(_sk_xor__avx)
 _sk_xor__avx:
-  .byte  196,98,125,24,5,188,98,0,0          // vbroadcastss  0x62bc(%rip),%ymm8        # 67b4 <_sk_callback_avx+0x160>
+  .byte  196,98,125,24,5,228,96,0,0          // vbroadcastss  0x60e4(%rip),%ymm8        # 65dc <_sk_callback_avx+0x160>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -15464,7 +15464,7 @@
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,95,209                  // vmaxps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,60,98,0,0           // vbroadcastss  0x623c(%rip),%ymm8        # 67b8 <_sk_callback_avx+0x164>
+  .byte  196,98,125,24,5,100,96,0,0          // vbroadcastss  0x6064(%rip),%ymm8        # 65e0 <_sk_callback_avx+0x164>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -15490,7 +15490,7 @@
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,232,97,0,0          // vbroadcastss  0x61e8(%rip),%ymm8        # 67bc <_sk_callback_avx+0x168>
+  .byte  196,98,125,24,5,16,96,0,0           // vbroadcastss  0x6010(%rip),%ymm8        # 65e4 <_sk_callback_avx+0x168>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -15519,7 +15519,7 @@
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,136,97,0,0          // vbroadcastss  0x6188(%rip),%ymm8        # 67c0 <_sk_callback_avx+0x16c>
+  .byte  196,98,125,24,5,176,95,0,0          // vbroadcastss  0x5fb0(%rip),%ymm8        # 65e8 <_sk_callback_avx+0x16c>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -15542,7 +15542,7 @@
   .byte  197,236,89,214                      // vmulps        %ymm6,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,67,97,0,0           // vbroadcastss  0x6143(%rip),%ymm8        # 67c4 <_sk_callback_avx+0x170>
+  .byte  196,98,125,24,5,107,95,0,0          // vbroadcastss  0x5f6b(%rip),%ymm8        # 65ec <_sk_callback_avx+0x170>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -15553,7 +15553,7 @@
 .globl _sk_colorburn_avx
 FUNCTION(_sk_colorburn_avx)
 _sk_colorburn_avx:
-  .byte  196,98,125,24,5,46,97,0,0           // vbroadcastss  0x612e(%rip),%ymm8        # 67c8 <_sk_callback_avx+0x174>
+  .byte  196,98,125,24,5,86,95,0,0           // vbroadcastss  0x5f56(%rip),%ymm8        # 65f0 <_sk_callback_avx+0x174>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,216                       // vmulps        %ymm0,%ymm9,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
@@ -15615,7 +15615,7 @@
 FUNCTION(_sk_colordodge_avx)
 _sk_colordodge_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,13,42,96,0,0          // vbroadcastss  0x602a(%rip),%ymm9        # 67cc <_sk_callback_avx+0x178>
+  .byte  196,98,125,24,13,82,94,0,0          // vbroadcastss  0x5e52(%rip),%ymm9        # 65f4 <_sk_callback_avx+0x178>
   .byte  197,52,92,215                       // vsubps        %ymm7,%ymm9,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,52,92,203                       // vsubps        %ymm3,%ymm9,%ymm9
@@ -15672,7 +15672,7 @@
 .globl _sk_hardlight_avx
 FUNCTION(_sk_hardlight_avx)
 _sk_hardlight_avx:
-  .byte  196,98,125,24,5,60,95,0,0           // vbroadcastss  0x5f3c(%rip),%ymm8        # 67d0 <_sk_callback_avx+0x17c>
+  .byte  196,98,125,24,5,100,93,0,0          // vbroadcastss  0x5d64(%rip),%ymm8        # 65f8 <_sk_callback_avx+0x17c>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -15727,7 +15727,7 @@
 .globl _sk_overlay_avx
 FUNCTION(_sk_overlay_avx)
 _sk_overlay_avx:
-  .byte  196,98,125,24,5,101,94,0,0          // vbroadcastss  0x5e65(%rip),%ymm8        # 67d4 <_sk_callback_avx+0x180>
+  .byte  196,98,125,24,5,141,92,0,0          // vbroadcastss  0x5c8d(%rip),%ymm8        # 65fc <_sk_callback_avx+0x180>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -15793,10 +15793,10 @@
   .byte  196,65,60,88,192                    // vaddps        %ymm8,%ymm8,%ymm8
   .byte  196,65,60,89,216                    // vmulps        %ymm8,%ymm8,%ymm11
   .byte  196,65,60,88,195                    // vaddps        %ymm11,%ymm8,%ymm8
-  .byte  196,98,125,24,29,92,93,0,0          // vbroadcastss  0x5d5c(%rip),%ymm11        # 67dc <_sk_callback_avx+0x188>
+  .byte  196,98,125,24,29,132,91,0,0         // vbroadcastss  0x5b84(%rip),%ymm11        # 6604 <_sk_callback_avx+0x188>
   .byte  196,65,28,88,235                    // vaddps        %ymm11,%ymm12,%ymm13
   .byte  196,65,20,89,192                    // vmulps        %ymm8,%ymm13,%ymm8
-  .byte  196,98,125,24,45,77,93,0,0          // vbroadcastss  0x5d4d(%rip),%ymm13        # 67e0 <_sk_callback_avx+0x18c>
+  .byte  196,98,125,24,45,117,91,0,0         // vbroadcastss  0x5b75(%rip),%ymm13        # 6608 <_sk_callback_avx+0x18c>
   .byte  196,65,28,89,245                    // vmulps        %ymm13,%ymm12,%ymm14
   .byte  196,65,12,88,192                    // vaddps        %ymm8,%ymm14,%ymm8
   .byte  196,65,124,82,244                   // vrsqrtps      %ymm12,%ymm14
@@ -15807,7 +15807,7 @@
   .byte  197,4,194,255,2                     // vcmpleps      %ymm7,%ymm15,%ymm15
   .byte  196,67,13,74,240,240                // vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   .byte  197,116,88,249                      // vaddps        %ymm1,%ymm1,%ymm15
-  .byte  196,98,125,24,5,11,93,0,0           // vbroadcastss  0x5d0b(%rip),%ymm8        # 67d8 <_sk_callback_avx+0x184>
+  .byte  196,98,125,24,5,51,91,0,0           // vbroadcastss  0x5b33(%rip),%ymm8        # 6600 <_sk_callback_avx+0x184>
   .byte  196,65,60,92,228                    // vsubps        %ymm12,%ymm8,%ymm12
   .byte  197,132,92,195                      // vsubps        %ymm3,%ymm15,%ymm0
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
@@ -15934,12 +15934,12 @@
   .byte  196,65,28,89,219                    // vmulps        %ymm11,%ymm12,%ymm11
   .byte  196,65,36,94,222                    // vdivps        %ymm14,%ymm11,%ymm11
   .byte  196,67,37,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  .byte  196,98,125,24,53,218,90,0,0         // vbroadcastss  0x5ada(%rip),%ymm14        # 67e4 <_sk_callback_avx+0x190>
+  .byte  196,98,125,24,53,2,89,0,0           // vbroadcastss  0x5902(%rip),%ymm14        # 660c <_sk_callback_avx+0x190>
   .byte  196,65,92,89,222                    // vmulps        %ymm14,%ymm4,%ymm11
-  .byte  196,98,125,24,61,208,90,0,0         // vbroadcastss  0x5ad0(%rip),%ymm15        # 67e8 <_sk_callback_avx+0x194>
+  .byte  196,98,125,24,61,248,88,0,0         // vbroadcastss  0x58f8(%rip),%ymm15        # 6610 <_sk_callback_avx+0x194>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
-  .byte  196,226,125,24,5,193,90,0,0         // vbroadcastss  0x5ac1(%rip),%ymm0        # 67ec <_sk_callback_avx+0x198>
+  .byte  196,226,125,24,5,233,88,0,0         // vbroadcastss  0x58e9(%rip),%ymm0        # 6614 <_sk_callback_avx+0x198>
   .byte  197,76,89,232                       // vmulps        %ymm0,%ymm6,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
   .byte  196,65,52,89,238                    // vmulps        %ymm14,%ymm9,%ymm13
@@ -16000,7 +16000,7 @@
   .byte  196,65,36,95,208                    // vmaxps        %ymm8,%ymm11,%ymm10
   .byte  196,195,109,74,209,240              // vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,154,89,0,0          // vbroadcastss  0x599a(%rip),%ymm8        # 67f0 <_sk_callback_avx+0x19c>
+  .byte  196,98,125,24,5,194,87,0,0          // vbroadcastss  0x57c2(%rip),%ymm8        # 6618 <_sk_callback_avx+0x19c>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,201                      // vmulps        %ymm1,%ymm9,%ymm1
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -16057,12 +16057,12 @@
   .byte  196,65,28,89,219                    // vmulps        %ymm11,%ymm12,%ymm11
   .byte  196,65,36,94,222                    // vdivps        %ymm14,%ymm11,%ymm11
   .byte  196,67,37,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  .byte  196,98,125,24,53,168,88,0,0         // vbroadcastss  0x58a8(%rip),%ymm14        # 67f4 <_sk_callback_avx+0x1a0>
+  .byte  196,98,125,24,53,208,86,0,0         // vbroadcastss  0x56d0(%rip),%ymm14        # 661c <_sk_callback_avx+0x1a0>
   .byte  196,65,92,89,222                    // vmulps        %ymm14,%ymm4,%ymm11
-  .byte  196,98,125,24,61,158,88,0,0         // vbroadcastss  0x589e(%rip),%ymm15        # 67f8 <_sk_callback_avx+0x1a4>
+  .byte  196,98,125,24,61,198,86,0,0         // vbroadcastss  0x56c6(%rip),%ymm15        # 6620 <_sk_callback_avx+0x1a4>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
-  .byte  196,226,125,24,5,143,88,0,0         // vbroadcastss  0x588f(%rip),%ymm0        # 67fc <_sk_callback_avx+0x1a8>
+  .byte  196,226,125,24,5,183,86,0,0         // vbroadcastss  0x56b7(%rip),%ymm0        # 6624 <_sk_callback_avx+0x1a8>
   .byte  197,76,89,232                       // vmulps        %ymm0,%ymm6,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
   .byte  196,65,52,89,238                    // vmulps        %ymm14,%ymm9,%ymm13
@@ -16123,7 +16123,7 @@
   .byte  196,65,36,95,208                    // vmaxps        %ymm8,%ymm11,%ymm10
   .byte  196,195,109,74,209,240              // vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,104,87,0,0          // vbroadcastss  0x5768(%rip),%ymm8        # 6800 <_sk_callback_avx+0x1ac>
+  .byte  196,98,125,24,5,144,85,0,0          // vbroadcastss  0x5590(%rip),%ymm8        # 6628 <_sk_callback_avx+0x1ac>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,201                      // vmulps        %ymm1,%ymm9,%ymm1
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -16152,12 +16152,12 @@
   .byte  197,252,17,68,36,168                // vmovups       %ymm0,-0x58(%rsp)
   .byte  197,124,89,199                      // vmulps        %ymm7,%ymm0,%ymm8
   .byte  197,116,89,207                      // vmulps        %ymm7,%ymm1,%ymm9
-  .byte  196,98,125,24,45,254,86,0,0         // vbroadcastss  0x56fe(%rip),%ymm13        # 6804 <_sk_callback_avx+0x1b0>
+  .byte  196,98,125,24,45,38,85,0,0          // vbroadcastss  0x5526(%rip),%ymm13        # 662c <_sk_callback_avx+0x1b0>
   .byte  196,65,92,89,213                    // vmulps        %ymm13,%ymm4,%ymm10
-  .byte  196,98,125,24,53,244,86,0,0         // vbroadcastss  0x56f4(%rip),%ymm14        # 6808 <_sk_callback_avx+0x1b4>
+  .byte  196,98,125,24,53,28,85,0,0          // vbroadcastss  0x551c(%rip),%ymm14        # 6630 <_sk_callback_avx+0x1b4>
   .byte  196,65,84,89,222                    // vmulps        %ymm14,%ymm5,%ymm11
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,61,229,86,0,0         // vbroadcastss  0x56e5(%rip),%ymm15        # 680c <_sk_callback_avx+0x1b8>
+  .byte  196,98,125,24,61,13,85,0,0          // vbroadcastss  0x550d(%rip),%ymm15        # 6634 <_sk_callback_avx+0x1b8>
   .byte  196,65,76,89,223                    // vmulps        %ymm15,%ymm6,%ymm11
   .byte  196,193,44,88,195                   // vaddps        %ymm11,%ymm10,%ymm0
   .byte  196,65,60,89,221                    // vmulps        %ymm13,%ymm8,%ymm11
@@ -16220,7 +16220,7 @@
   .byte  196,65,44,95,207                    // vmaxps        %ymm15,%ymm10,%ymm9
   .byte  196,195,37,74,192,0                 // vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   .byte  196,65,124,95,199                   // vmaxps        %ymm15,%ymm0,%ymm8
-  .byte  196,226,125,24,5,172,85,0,0         // vbroadcastss  0x55ac(%rip),%ymm0        # 6810 <_sk_callback_avx+0x1bc>
+  .byte  196,226,125,24,5,212,83,0,0         // vbroadcastss  0x53d4(%rip),%ymm0        # 6638 <_sk_callback_avx+0x1bc>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,84,36,168                // vmulps        -0x58(%rsp),%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -16250,12 +16250,12 @@
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
   .byte  197,100,89,205                      // vmulps        %ymm5,%ymm3,%ymm9
-  .byte  196,98,125,24,45,62,85,0,0          // vbroadcastss  0x553e(%rip),%ymm13        # 6814 <_sk_callback_avx+0x1c0>
+  .byte  196,98,125,24,45,102,83,0,0         // vbroadcastss  0x5366(%rip),%ymm13        # 663c <_sk_callback_avx+0x1c0>
   .byte  196,65,108,89,213                   // vmulps        %ymm13,%ymm2,%ymm10
-  .byte  196,98,125,24,53,52,85,0,0          // vbroadcastss  0x5534(%rip),%ymm14        # 6818 <_sk_callback_avx+0x1c4>
+  .byte  196,98,125,24,53,92,83,0,0          // vbroadcastss  0x535c(%rip),%ymm14        # 6640 <_sk_callback_avx+0x1c4>
   .byte  196,65,116,89,222                   // vmulps        %ymm14,%ymm1,%ymm11
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,61,37,85,0,0          // vbroadcastss  0x5525(%rip),%ymm15        # 681c <_sk_callback_avx+0x1c8>
+  .byte  196,98,125,24,61,77,83,0,0          // vbroadcastss  0x534d(%rip),%ymm15        # 6644 <_sk_callback_avx+0x1c8>
   .byte  196,65,28,89,223                    // vmulps        %ymm15,%ymm12,%ymm11
   .byte  196,193,44,88,195                   // vaddps        %ymm11,%ymm10,%ymm0
   .byte  196,65,60,89,221                    // vmulps        %ymm13,%ymm8,%ymm11
@@ -16318,7 +16318,7 @@
   .byte  196,65,44,95,207                    // vmaxps        %ymm15,%ymm10,%ymm9
   .byte  196,195,37,74,192,0                 // vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   .byte  196,65,124,95,199                   // vmaxps        %ymm15,%ymm0,%ymm8
-  .byte  196,226,125,24,5,236,83,0,0         // vbroadcastss  0x53ec(%rip),%ymm0        # 6820 <_sk_callback_avx+0x1cc>
+  .byte  196,226,125,24,5,20,82,0,0          // vbroadcastss  0x5214(%rip),%ymm0        # 6648 <_sk_callback_avx+0x1cc>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -16342,32 +16342,34 @@
 .globl _sk_srcover_rgba_8888_avx
 FUNCTION(_sk_srcover_rgba_8888_avx)
 _sk_srcover_rgba_8888_avx:
+  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,46,1,0,0                     // jne           15bb <_sk_srcover_rgba_8888_avx+0x13c>
-  .byte  196,65,124,16,4,186                 // vmovups       (%r10,%rdi,4),%ymm8
-  .byte  197,124,40,13,37,87,0,0             // vmovaps       0x5725(%rip),%ymm9        # 6bc0 <_sk_callback_avx+0x56c>
-  .byte  196,193,60,84,225                   // vandps        %ymm9,%ymm8,%ymm4
+  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
+  .byte  76,3,8                              // add           (%rax),%r9
+  .byte  77,133,192                          // test          %r8,%r8
+  .byte  15,133,43,1,0,0                     // jne           15c3 <_sk_srcover_rgba_8888_avx+0x144>
+  .byte  196,193,124,16,57                   // vmovups       (%r9),%ymm7
+  .byte  197,124,40,13,219,85,0,0            // vmovaps       0x55db(%rip),%ymm9        # 6a80 <_sk_callback_avx+0x604>
+  .byte  196,193,68,84,225                   // vandps        %ymm9,%ymm7,%ymm4
   .byte  197,252,91,228                      // vcvtdq2ps     %ymm4,%ymm4
-  .byte  196,193,81,114,208,8                // vpsrld        $0x8,%xmm8,%xmm5
-  .byte  196,99,125,25,199,1                 // vextractf128  $0x1,%ymm8,%xmm7
-  .byte  197,201,114,215,8                   // vpsrld        $0x8,%xmm7,%xmm6
+  .byte  197,209,114,215,8                   // vpsrld        $0x8,%xmm7,%xmm5
+  .byte  196,195,125,25,248,1                // vextractf128  $0x1,%ymm7,%xmm8
+  .byte  196,193,73,114,208,8                // vpsrld        $0x8,%xmm8,%xmm6
   .byte  196,227,85,24,238,1                 // vinsertf128   $0x1,%xmm6,%ymm5,%ymm5
   .byte  196,193,84,84,233                   // vandps        %ymm9,%ymm5,%ymm5
   .byte  197,252,91,237                      // vcvtdq2ps     %ymm5,%ymm5
-  .byte  196,193,41,114,208,16               // vpsrld        $0x10,%xmm8,%xmm10
-  .byte  197,201,114,215,16                  // vpsrld        $0x10,%xmm7,%xmm6
+  .byte  197,169,114,215,16                  // vpsrld        $0x10,%xmm7,%xmm10
+  .byte  196,193,73,114,208,16               // vpsrld        $0x10,%xmm8,%xmm6
   .byte  196,227,45,24,246,1                 // vinsertf128   $0x1,%xmm6,%ymm10,%ymm6
   .byte  196,193,76,84,241                   // vandps        %ymm9,%ymm6,%ymm6
   .byte  197,252,91,246                      // vcvtdq2ps     %ymm6,%ymm6
-  .byte  196,193,57,114,208,24               // vpsrld        $0x18,%xmm8,%xmm8
-  .byte  197,193,114,215,24                  // vpsrld        $0x18,%xmm7,%xmm7
-  .byte  196,227,61,24,255,1                 // vinsertf128   $0x1,%xmm7,%ymm8,%ymm7
+  .byte  197,177,114,215,24                  // vpsrld        $0x18,%xmm7,%xmm9
+  .byte  196,193,65,114,208,24               // vpsrld        $0x18,%xmm8,%xmm7
+  .byte  196,227,53,24,255,1                 // vinsertf128   $0x1,%xmm7,%ymm9,%ymm7
   .byte  197,252,91,255                      // vcvtdq2ps     %ymm7,%ymm7
-  .byte  196,98,125,24,5,40,83,0,0           // vbroadcastss  0x5328(%rip),%ymm8        # 6824 <_sk_callback_avx+0x1d0>
+  .byte  196,98,125,24,5,70,81,0,0           // vbroadcastss  0x5146(%rip),%ymm8        # 664c <_sk_callback_avx+0x1d0>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
-  .byte  196,98,125,24,13,31,83,0,0          // vbroadcastss  0x531f(%rip),%ymm9        # 6828 <_sk_callback_avx+0x1d4>
+  .byte  196,98,125,24,13,61,81,0,0          // vbroadcastss  0x513d(%rip),%ymm9        # 6650 <_sk_callback_avx+0x1d4>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,60,89,212                       // vmulps        %ymm4,%ymm8,%ymm10
   .byte  196,193,124,88,194                  // vaddps        %ymm10,%ymm0,%ymm0
@@ -16399,97 +16401,40 @@
   .byte  196,67,37,24,210,1                  // vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
   .byte  196,65,53,86,202                    // vorpd         %ymm10,%ymm9,%ymm9
   .byte  196,65,61,86,193                    // vorpd         %ymm9,%ymm8,%ymm8
-  .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,183,0,0,0                    // jne           1668 <_sk_srcover_rgba_8888_avx+0x1e9>
-  .byte  196,65,124,17,4,186                 // vmovups       %ymm8,(%r10,%rdi,4)
+  .byte  77,133,192                          // test          %r8,%r8
+  .byte  117,87                              // jne           160e <_sk_srcover_rgba_8888_avx+0x18f>
+  .byte  196,65,124,17,1                     // vmovups       %ymm8,(%r9)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
-  .byte  65,137,200                          // mov           %ecx,%r8d
-  .byte  65,128,224,7                        // and           $0x7,%r8b
-  .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  65,254,200                          // dec           %r8b
-  .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,191,254,255,255              // ja            1493 <_sk_srcover_rgba_8888_avx+0x14>
-  .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,253,0,0,0                 // lea           0xfd(%rip),%r9        # 16dc <_sk_srcover_rgba_8888_avx+0x25d>
-  .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
-  .byte  76,1,200                            // add           %r9,%rax
-  .byte  255,224                             // jmpq          *%rax
-  .byte  196,193,121,110,100,186,24          // vmovd         0x18(%r10,%rdi,4),%xmm4
-  .byte  197,249,112,228,68                  // vpshufd       $0x44,%xmm4,%xmm4
-  .byte  196,227,125,24,228,1                // vinsertf128   $0x1,%xmm4,%ymm0,%ymm4
-  .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
-  .byte  196,99,85,12,196,64                 // vblendps      $0x40,%ymm4,%ymm5,%ymm8
-  .byte  196,99,125,25,196,1                 // vextractf128  $0x1,%ymm8,%xmm4
-  .byte  196,195,89,34,100,186,20,1          // vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm4,%xmm4
-  .byte  196,99,61,24,196,1                  // vinsertf128   $0x1,%xmm4,%ymm8,%ymm8
-  .byte  196,99,125,25,196,1                 // vextractf128  $0x1,%ymm8,%xmm4
-  .byte  196,195,89,34,100,186,16,0          // vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm4,%xmm4
-  .byte  196,99,61,24,196,1                  // vinsertf128   $0x1,%xmm4,%ymm8,%ymm8
-  .byte  196,195,57,34,100,186,12,3          // vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm8,%xmm4
-  .byte  196,99,61,12,196,15                 // vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  .byte  196,195,57,34,100,186,8,2           // vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm8,%xmm4
-  .byte  196,99,61,12,196,15                 // vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  .byte  196,195,57,34,100,186,4,1           // vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm8,%xmm4
-  .byte  196,99,61,12,196,15                 // vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  .byte  196,195,57,34,36,186,0              // vpinsrd       $0x0,(%r10,%rdi,4),%xmm8,%xmm4
-  .byte  196,99,61,12,196,15                 // vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  .byte  233,43,254,255,255                  // jmpq          1493 <_sk_srcover_rgba_8888_avx+0x14>
-  .byte  65,137,200                          // mov           %ecx,%r8d
-  .byte  65,128,224,7                        // and           $0x7,%r8b
-  .byte  65,254,200                          // dec           %r8b
-  .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,59,255,255,255               // ja            15b7 <_sk_srcover_rgba_8888_avx+0x138>
-  .byte  65,15,182,192                       // movzbl        %r8b,%eax
-  .byte  76,141,5,113,0,0,0                  // lea           0x71(%rip),%r8        # 16f8 <_sk_srcover_rgba_8888_avx+0x279>
-  .byte  73,99,4,128                         // movslq        (%r8,%rax,4),%rax
-  .byte  76,1,192                            // add           %r8,%rax
-  .byte  255,224                             // jmpq          *%rax
-  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
-  .byte  196,67,121,22,76,186,24,2           // vpextrd       $0x2,%xmm9,0x18(%r10,%rdi,4)
-  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
-  .byte  196,67,121,22,76,186,20,1           // vpextrd       $0x1,%xmm9,0x14(%r10,%rdi,4)
-  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
-  .byte  196,65,122,17,76,186,16             // vmovss        %xmm9,0x10(%r10,%rdi,4)
-  .byte  196,67,121,22,68,186,12,3           // vpextrd       $0x3,%xmm8,0xc(%r10,%rdi,4)
-  .byte  196,67,121,22,68,186,8,2            // vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
-  .byte  196,67,121,22,68,186,4,1            // vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
-  .byte  196,65,121,126,4,186                // vmovd         %xmm8,(%r10,%rdi,4)
-  .byte  233,219,254,255,255                 // jmpq          15b7 <_sk_srcover_rgba_8888_avx+0x138>
-  .byte  122,255                             // jp            16dd <_sk_srcover_rgba_8888_avx+0x25e>
-  .byte  255                                 // (bad)
-  .byte  255,108,255,255                     // ljmp          *-0x1(%rdi,%rdi,8)
-  .byte  255,94,255                          // lcall         *-0x1(%rsi)
-  .byte  255                                 // (bad)
-  .byte  255,80,255                          // callq         *-0x1(%rax)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  60,255                              // cmp           $0xff,%al
-  .byte  255                                 // (bad)
-  .byte  255,40                              // ljmp          *(%rax)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,12,255                          // decl          (%rdi,%rdi,8)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  217,255                             // fcos
-  .byte  255                                 // (bad)
-  .byte  255,209                             // callq         *%rcx
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,201                             // dec           %ecx
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,193                             // inc           %ecx
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,180,255,255,255,166,255         // pushq         -0x590001(%rdi,%rdi,8)
-  .byte  255                                 // (bad)
-  .byte  255                                 // .byte         0xff
-  .byte  152                                 // cwtl
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // .byte         0xff
+  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
+  .byte  68,41,193                           // sub           %r8d,%ecx
+  .byte  192,225,3                           // shl           $0x3,%cl
+  .byte  72,199,192,255,255,255,255          // mov           $0xffffffffffffffff,%rax
+  .byte  72,211,232                          // shr           %cl,%rax
+  .byte  196,225,249,110,224                 // vmovq         %rax,%xmm4
+  .byte  196,226,121,48,228                  // vpmovzxbw     %xmm4,%xmm4
+  .byte  196,226,89,0,45,245,83,0,0          // vpshufb       0x53f5(%rip),%xmm4,%xmm5        # 69e0 <_sk_callback_avx+0x564>
+  .byte  196,226,121,33,237                  // vpmovsxbd     %xmm5,%xmm5
+  .byte  196,226,89,0,37,247,83,0,0          // vpshufb       0x53f7(%rip),%xmm4,%xmm4        # 69f0 <_sk_callback_avx+0x574>
+  .byte  196,226,121,33,228                  // vpmovsxbd     %xmm4,%xmm4
+  .byte  196,227,85,24,228,1                 // vinsertf128   $0x1,%xmm4,%ymm5,%ymm4
+  .byte  196,194,93,44,57                    // vmaskmovps    (%r9),%ymm4,%ymm7
+  .byte  233,143,254,255,255                 // jmpq          149d <_sk_srcover_rgba_8888_avx+0x1e>
+  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
+  .byte  68,41,193                           // sub           %r8d,%ecx
+  .byte  192,225,3                           // shl           $0x3,%cl
+  .byte  72,199,192,255,255,255,255          // mov           $0xffffffffffffffff,%rax
+  .byte  72,211,232                          // shr           %cl,%rax
+  .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
+  .byte  196,66,121,48,201                   // vpmovzxbw     %xmm9,%xmm9
+  .byte  196,98,49,0,21,170,83,0,0           // vpshufb       0x53aa(%rip),%xmm9,%xmm10        # 69e0 <_sk_callback_avx+0x564>
+  .byte  196,66,121,33,210                   // vpmovsxbd     %xmm10,%xmm10
+  .byte  196,98,49,0,13,172,83,0,0           // vpshufb       0x53ac(%rip),%xmm9,%xmm9        # 69f0 <_sk_callback_avx+0x574>
+  .byte  196,66,121,33,201                   // vpmovsxbd     %xmm9,%xmm9
+  .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
+  .byte  196,66,53,46,1                      // vmaskmovps    %ymm8,%ymm9,(%r9)
+  .byte  233,99,255,255,255                  // jmpq          15bc <_sk_srcover_rgba_8888_avx+0x13d>
 
 HIDDEN _sk_clamp_0_avx
 .globl _sk_clamp_0_avx
@@ -16507,7 +16452,7 @@
 .globl _sk_clamp_1_avx
 FUNCTION(_sk_clamp_1_avx)
 _sk_clamp_1_avx:
-  .byte  196,98,125,24,5,242,80,0,0          // vbroadcastss  0x50f2(%rip),%ymm8        # 682c <_sk_callback_avx+0x1d8>
+  .byte  196,98,125,24,5,213,79,0,0          // vbroadcastss  0x4fd5(%rip),%ymm8        # 6654 <_sk_callback_avx+0x1d8>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
@@ -16519,7 +16464,7 @@
 .globl _sk_clamp_a_avx
 FUNCTION(_sk_clamp_a_avx)
 _sk_clamp_a_avx:
-  .byte  196,98,125,24,5,213,80,0,0          // vbroadcastss  0x50d5(%rip),%ymm8        # 6830 <_sk_callback_avx+0x1dc>
+  .byte  196,98,125,24,5,184,79,0,0          // vbroadcastss  0x4fb8(%rip),%ymm8        # 6658 <_sk_callback_avx+0x1dc>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  197,252,93,195                      // vminps        %ymm3,%ymm0,%ymm0
   .byte  197,244,93,203                      // vminps        %ymm3,%ymm1,%ymm1
@@ -16605,7 +16550,7 @@
 _sk_unpremul_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,65,100,194,200,0                // vcmpeqps      %ymm8,%ymm3,%ymm9
-  .byte  196,98,125,24,21,29,80,0,0          // vbroadcastss  0x501d(%rip),%ymm10        # 6834 <_sk_callback_avx+0x1e0>
+  .byte  196,98,125,24,21,0,79,0,0           // vbroadcastss  0x4f00(%rip),%ymm10        # 665c <_sk_callback_avx+0x1e0>
   .byte  197,44,94,211                       // vdivps        %ymm3,%ymm10,%ymm10
   .byte  196,67,45,74,192,144                // vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
@@ -16618,17 +16563,17 @@
 .globl _sk_from_srgb_avx
 FUNCTION(_sk_from_srgb_avx)
 _sk_from_srgb_avx:
-  .byte  196,98,125,24,5,254,79,0,0          // vbroadcastss  0x4ffe(%rip),%ymm8        # 6838 <_sk_callback_avx+0x1e4>
+  .byte  196,98,125,24,5,225,78,0,0          // vbroadcastss  0x4ee1(%rip),%ymm8        # 6660 <_sk_callback_avx+0x1e4>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  197,124,89,208                      // vmulps        %ymm0,%ymm0,%ymm10
-  .byte  196,98,125,24,29,240,79,0,0         // vbroadcastss  0x4ff0(%rip),%ymm11        # 683c <_sk_callback_avx+0x1e8>
+  .byte  196,98,125,24,29,211,78,0,0         // vbroadcastss  0x4ed3(%rip),%ymm11        # 6664 <_sk_callback_avx+0x1e8>
   .byte  196,65,124,89,227                   // vmulps        %ymm11,%ymm0,%ymm12
-  .byte  196,98,125,24,45,230,79,0,0         // vbroadcastss  0x4fe6(%rip),%ymm13        # 6840 <_sk_callback_avx+0x1ec>
+  .byte  196,98,125,24,45,201,78,0,0         // vbroadcastss  0x4ec9(%rip),%ymm13        # 6668 <_sk_callback_avx+0x1ec>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,44,89,212                    // vmulps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,215,79,0,0         // vbroadcastss  0x4fd7(%rip),%ymm12        # 6844 <_sk_callback_avx+0x1f0>
+  .byte  196,98,125,24,37,186,78,0,0         // vbroadcastss  0x4eba(%rip),%ymm12        # 666c <_sk_callback_avx+0x1f0>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,53,205,79,0,0         // vbroadcastss  0x4fcd(%rip),%ymm14        # 6848 <_sk_callback_avx+0x1f4>
+  .byte  196,98,125,24,53,176,78,0,0         // vbroadcastss  0x4eb0(%rip),%ymm14        # 6670 <_sk_callback_avx+0x1f4>
   .byte  196,193,124,194,198,1               // vcmpltps      %ymm14,%ymm0,%ymm0
   .byte  196,195,45,74,193,0                 // vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   .byte  196,65,116,89,200                   // vmulps        %ymm8,%ymm1,%ymm9
@@ -16655,20 +16600,20 @@
 FUNCTION(_sk_to_srgb_avx)
 _sk_to_srgb_avx:
   .byte  197,124,82,200                      // vrsqrtps      %ymm0,%ymm9
-  .byte  196,98,125,24,5,98,79,0,0           // vbroadcastss  0x4f62(%rip),%ymm8        # 684c <_sk_callback_avx+0x1f8>
+  .byte  196,98,125,24,5,69,78,0,0           // vbroadcastss  0x4e45(%rip),%ymm8        # 6674 <_sk_callback_avx+0x1f8>
   .byte  196,65,124,89,208                   // vmulps        %ymm8,%ymm0,%ymm10
-  .byte  196,98,125,24,29,88,79,0,0          // vbroadcastss  0x4f58(%rip),%ymm11        # 6850 <_sk_callback_avx+0x1fc>
+  .byte  196,98,125,24,29,59,78,0,0          // vbroadcastss  0x4e3b(%rip),%ymm11        # 6678 <_sk_callback_avx+0x1fc>
   .byte  196,65,52,89,227                    // vmulps        %ymm11,%ymm9,%ymm12
-  .byte  196,98,125,24,45,78,79,0,0          // vbroadcastss  0x4f4e(%rip),%ymm13        # 6854 <_sk_callback_avx+0x200>
+  .byte  196,98,125,24,45,49,78,0,0          // vbroadcastss  0x4e31(%rip),%ymm13        # 667c <_sk_callback_avx+0x200>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,52,89,228                    // vmulps        %ymm12,%ymm9,%ymm12
-  .byte  196,98,125,24,53,63,79,0,0          // vbroadcastss  0x4f3f(%rip),%ymm14        # 6858 <_sk_callback_avx+0x204>
+  .byte  196,98,125,24,53,34,78,0,0          // vbroadcastss  0x4e22(%rip),%ymm14        # 6680 <_sk_callback_avx+0x204>
   .byte  196,65,28,88,230                    // vaddps        %ymm14,%ymm12,%ymm12
-  .byte  196,98,125,24,61,53,79,0,0          // vbroadcastss  0x4f35(%rip),%ymm15        # 685c <_sk_callback_avx+0x208>
+  .byte  196,98,125,24,61,24,78,0,0          // vbroadcastss  0x4e18(%rip),%ymm15        # 6684 <_sk_callback_avx+0x208>
   .byte  196,65,52,88,207                    // vaddps        %ymm15,%ymm9,%ymm9
   .byte  196,65,124,83,201                   // vrcpps        %ymm9,%ymm9
   .byte  196,65,52,89,204                    // vmulps        %ymm12,%ymm9,%ymm9
-  .byte  196,98,125,24,37,33,79,0,0          // vbroadcastss  0x4f21(%rip),%ymm12        # 6860 <_sk_callback_avx+0x20c>
+  .byte  196,98,125,24,37,4,78,0,0           // vbroadcastss  0x4e04(%rip),%ymm12        # 6688 <_sk_callback_avx+0x20c>
   .byte  196,193,124,194,196,1               // vcmpltps      %ymm12,%ymm0,%ymm0
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  197,124,82,201                      // vrsqrtps      %ymm1,%ymm9
@@ -16705,7 +16650,7 @@
   .byte  197,124,93,201                      // vminps        %ymm1,%ymm0,%ymm9
   .byte  197,52,93,202                       // vminps        %ymm2,%ymm9,%ymm9
   .byte  196,65,60,92,209                    // vsubps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,135,78,0,0         // vbroadcastss  0x4e87(%rip),%ymm11        # 6864 <_sk_callback_avx+0x210>
+  .byte  196,98,125,24,29,106,77,0,0         // vbroadcastss  0x4d6a(%rip),%ymm11        # 668c <_sk_callback_avx+0x210>
   .byte  196,65,36,94,218                    // vdivps        %ymm10,%ymm11,%ymm11
   .byte  197,116,92,226                      // vsubps        %ymm2,%ymm1,%ymm12
   .byte  196,65,28,89,227                    // vmulps        %ymm11,%ymm12,%ymm12
@@ -16715,19 +16660,19 @@
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  197,252,92,201                      // vsubps        %ymm1,%ymm0,%ymm1
   .byte  196,193,116,89,203                  // vmulps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,96,78,0,0          // vbroadcastss  0x4e60(%rip),%ymm11        # 6870 <_sk_callback_avx+0x21c>
+  .byte  196,98,125,24,29,67,77,0,0          // vbroadcastss  0x4d43(%rip),%ymm11        # 6698 <_sk_callback_avx+0x21c>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,78,78,0,0          // vbroadcastss  0x4e4e(%rip),%ymm11        # 686c <_sk_callback_avx+0x218>
+  .byte  196,98,125,24,29,49,77,0,0          // vbroadcastss  0x4d31(%rip),%ymm11        # 6694 <_sk_callback_avx+0x218>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,227,117,74,202,224              // vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,54,78,0,0         // vbroadcastss  0x4e36(%rip),%ymm2        # 6868 <_sk_callback_avx+0x214>
+  .byte  196,226,125,24,21,25,77,0,0         // vbroadcastss  0x4d19(%rip),%ymm2        # 6690 <_sk_callback_avx+0x214>
   .byte  196,65,12,87,246                    // vxorps        %ymm14,%ymm14,%ymm14
   .byte  196,227,13,74,210,208               // vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   .byte  197,188,194,192,0                   // vcmpeqps      %ymm0,%ymm8,%ymm0
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
   .byte  196,227,117,74,194,0                // vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   .byte  196,193,60,88,201                   // vaddps        %ymm9,%ymm8,%ymm1
-  .byte  196,98,125,24,37,29,78,0,0          // vbroadcastss  0x4e1d(%rip),%ymm12        # 6878 <_sk_callback_avx+0x224>
+  .byte  196,98,125,24,37,0,77,0,0           // vbroadcastss  0x4d00(%rip),%ymm12        # 66a0 <_sk_callback_avx+0x224>
   .byte  196,193,116,89,212                  // vmulps        %ymm12,%ymm1,%ymm2
   .byte  197,28,194,226,1                    // vcmpltps      %ymm2,%ymm12,%ymm12
   .byte  196,65,36,92,216                    // vsubps        %ymm8,%ymm11,%ymm11
@@ -16737,7 +16682,7 @@
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  196,195,125,74,198,128              // vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   .byte  196,195,117,74,206,128              // vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  .byte  196,98,125,24,5,224,77,0,0          // vbroadcastss  0x4de0(%rip),%ymm8        # 6874 <_sk_callback_avx+0x220>
+  .byte  196,98,125,24,5,195,76,0,0          // vbroadcastss  0x4cc3(%rip),%ymm8        # 669c <_sk_callback_avx+0x220>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16754,7 +16699,7 @@
   .byte  197,252,17,92,36,128                // vmovups       %ymm3,-0x80(%rsp)
   .byte  197,252,40,225                      // vmovaps       %ymm1,%ymm4
   .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  196,98,125,24,5,173,77,0,0          // vbroadcastss  0x4dad(%rip),%ymm8        # 687c <_sk_callback_avx+0x228>
+  .byte  196,98,125,24,5,144,76,0,0          // vbroadcastss  0x4c90(%rip),%ymm8        # 66a4 <_sk_callback_avx+0x228>
   .byte  197,60,194,202,2                    // vcmpleps      %ymm2,%ymm8,%ymm9
   .byte  197,92,89,210                       // vmulps        %ymm2,%ymm4,%ymm10
   .byte  196,65,92,92,218                    // vsubps        %ymm10,%ymm4,%ymm11
@@ -16762,23 +16707,23 @@
   .byte  197,52,88,210                       // vaddps        %ymm2,%ymm9,%ymm10
   .byte  197,108,88,202                      // vaddps        %ymm2,%ymm2,%ymm9
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,29,135,77,0,0         // vbroadcastss  0x4d87(%rip),%ymm11        # 6880 <_sk_callback_avx+0x22c>
+  .byte  196,98,125,24,29,106,76,0,0         // vbroadcastss  0x4c6a(%rip),%ymm11        # 66a8 <_sk_callback_avx+0x22c>
   .byte  196,65,100,88,219                   // vaddps        %ymm11,%ymm3,%ymm11
   .byte  196,67,125,8,227,1                  // vroundps      $0x1,%ymm11,%ymm12
   .byte  196,65,36,92,252                    // vsubps        %ymm12,%ymm11,%ymm15
   .byte  196,65,44,92,217                    // vsubps        %ymm9,%ymm10,%ymm11
-  .byte  196,98,125,24,37,113,77,0,0         // vbroadcastss  0x4d71(%rip),%ymm12        # 6888 <_sk_callback_avx+0x234>
+  .byte  196,98,125,24,37,84,76,0,0          // vbroadcastss  0x4c54(%rip),%ymm12        # 66b0 <_sk_callback_avx+0x234>
   .byte  196,193,4,89,196                    // vmulps        %ymm12,%ymm15,%ymm0
-  .byte  196,98,125,24,45,103,77,0,0         // vbroadcastss  0x4d67(%rip),%ymm13        # 688c <_sk_callback_avx+0x238>
+  .byte  196,98,125,24,45,74,76,0,0          // vbroadcastss  0x4c4a(%rip),%ymm13        # 66b4 <_sk_callback_avx+0x238>
   .byte  197,20,92,240                       // vsubps        %ymm0,%ymm13,%ymm14
   .byte  196,65,36,89,246                    // vmulps        %ymm14,%ymm11,%ymm14
   .byte  196,65,52,88,246                    // vaddps        %ymm14,%ymm9,%ymm14
-  .byte  196,226,125,24,13,72,77,0,0         // vbroadcastss  0x4d48(%rip),%ymm1        # 6884 <_sk_callback_avx+0x230>
+  .byte  196,226,125,24,13,43,76,0,0         // vbroadcastss  0x4c2b(%rip),%ymm1        # 66ac <_sk_callback_avx+0x230>
   .byte  196,193,116,194,255,2               // vcmpleps      %ymm15,%ymm1,%ymm7
   .byte  196,195,13,74,249,112               // vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   .byte  196,65,60,194,247,2                 // vcmpleps      %ymm15,%ymm8,%ymm14
   .byte  196,227,45,74,255,224               // vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  .byte  196,98,125,24,53,51,77,0,0          // vbroadcastss  0x4d33(%rip),%ymm14        # 6890 <_sk_callback_avx+0x23c>
+  .byte  196,98,125,24,53,22,76,0,0          // vbroadcastss  0x4c16(%rip),%ymm14        # 66b8 <_sk_callback_avx+0x23c>
   .byte  196,65,12,194,255,2                 // vcmpleps      %ymm15,%ymm14,%ymm15
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -16797,7 +16742,7 @@
   .byte  197,164,89,247                      // vmulps        %ymm7,%ymm11,%ymm6
   .byte  197,180,88,246                      // vaddps        %ymm6,%ymm9,%ymm6
   .byte  196,227,77,74,237,0                 // vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  .byte  196,226,125,24,5,213,76,0,0         // vbroadcastss  0x4cd5(%rip),%ymm0        # 6894 <_sk_callback_avx+0x240>
+  .byte  196,226,125,24,5,184,75,0,0         // vbroadcastss  0x4bb8(%rip),%ymm0        # 66bc <_sk_callback_avx+0x240>
   .byte  197,228,88,192                      // vaddps        %ymm0,%ymm3,%ymm0
   .byte  196,227,125,8,216,1                 // vroundps      $0x1,%ymm0,%ymm3
   .byte  197,252,92,195                      // vsubps        %ymm3,%ymm0,%ymm0
@@ -16849,14 +16794,14 @@
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,68                              // jne           1cb6 <_sk_scale_u8_avx+0x54>
+  .byte  117,68                              // jne           1bfb <_sk_scale_u8_avx+0x54>
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,121,49,200                   // vpmovzxbd     %xmm8,%xmm9
   .byte  196,67,121,4,192,229                // vpermilps     $0xe5,%xmm8,%xmm8
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,254,75,0,0         // vbroadcastss  0x4bfe(%rip),%ymm9        # 6898 <_sk_callback_avx+0x244>
+  .byte  196,98,125,24,13,225,74,0,0         // vbroadcastss  0x4ae1(%rip),%ymm9        # 66c0 <_sk_callback_avx+0x244>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -16874,9 +16819,9 @@
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           1cbe <_sk_scale_u8_avx+0x5c>
+  .byte  117,234                             // jne           1c03 <_sk_scale_u8_avx+0x5c>
   .byte  196,65,249,110,193                  // vmovq         %r9,%xmm8
-  .byte  235,155                             // jmp           1c76 <_sk_scale_u8_avx+0x14>
+  .byte  235,155                             // jmp           1bbb <_sk_scale_u8_avx+0x14>
 
 HIDDEN _sk_lerp_1_float_avx
 .globl _sk_lerp_1_float_avx
@@ -16908,14 +16853,14 @@
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,104                             // jne           1d92 <_sk_lerp_u8_avx+0x78>
+  .byte  117,104                             // jne           1cd7 <_sk_lerp_u8_avx+0x78>
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,121,49,200                   // vpmovzxbd     %xmm8,%xmm9
   .byte  196,67,121,4,192,229                // vpermilps     $0xe5,%xmm8,%xmm8
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,74,75,0,0          // vbroadcastss  0x4b4a(%rip),%ymm9        # 689c <_sk_callback_avx+0x248>
+  .byte  196,98,125,24,13,45,74,0,0          // vbroadcastss  0x4a2d(%rip),%ymm9        # 66c4 <_sk_callback_avx+0x248>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -16941,9 +16886,9 @@
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           1d9a <_sk_lerp_u8_avx+0x80>
+  .byte  117,234                             // jne           1cdf <_sk_lerp_u8_avx+0x80>
   .byte  196,65,249,110,193                  // vmovq         %r9,%xmm8
-  .byte  233,116,255,255,255                 // jmpq          1d2e <_sk_lerp_u8_avx+0x14>
+  .byte  233,116,255,255,255                 // jmpq          1c73 <_sk_lerp_u8_avx+0x14>
 
 HIDDEN _sk_lerp_565_avx
 .globl _sk_lerp_565_avx
@@ -16952,26 +16897,26 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,208,0,0,0                    // jne           1e98 <_sk_lerp_565_avx+0xde>
+  .byte  15,133,208,0,0,0                    // jne           1ddd <_sk_lerp_565_avx+0xde>
   .byte  196,65,122,111,4,122                // vmovdqu       (%r10,%rdi,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  196,65,57,105,201                   // vpunpckhwd    %xmm9,%xmm8,%xmm9
   .byte  196,66,121,51,192                   // vpmovzxwd     %xmm8,%xmm8
   .byte  196,67,61,24,193,1                  // vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,180,74,0,0         // vbroadcastss  0x4ab4(%rip),%ymm9        # 68a0 <_sk_callback_avx+0x24c>
+  .byte  196,98,125,24,13,151,73,0,0         // vbroadcastss  0x4997(%rip),%ymm9        # 66c8 <_sk_callback_avx+0x24c>
   .byte  196,65,60,84,201                    // vandps        %ymm9,%ymm8,%ymm9
   .byte  196,65,124,91,201                   // vcvtdq2ps     %ymm9,%ymm9
-  .byte  196,98,125,24,21,165,74,0,0         // vbroadcastss  0x4aa5(%rip),%ymm10        # 68a4 <_sk_callback_avx+0x250>
+  .byte  196,98,125,24,21,136,73,0,0         // vbroadcastss  0x4988(%rip),%ymm10        # 66cc <_sk_callback_avx+0x250>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,155,74,0,0         // vbroadcastss  0x4a9b(%rip),%ymm10        # 68a8 <_sk_callback_avx+0x254>
+  .byte  196,98,125,24,21,126,73,0,0         // vbroadcastss  0x497e(%rip),%ymm10        # 66d0 <_sk_callback_avx+0x254>
   .byte  196,65,60,84,210                    // vandps        %ymm10,%ymm8,%ymm10
   .byte  196,65,124,91,210                   // vcvtdq2ps     %ymm10,%ymm10
-  .byte  196,98,125,24,29,140,74,0,0         // vbroadcastss  0x4a8c(%rip),%ymm11        # 68ac <_sk_callback_avx+0x258>
+  .byte  196,98,125,24,29,111,73,0,0         // vbroadcastss  0x496f(%rip),%ymm11        # 66d4 <_sk_callback_avx+0x258>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,29,130,74,0,0         // vbroadcastss  0x4a82(%rip),%ymm11        # 68b0 <_sk_callback_avx+0x25c>
+  .byte  196,98,125,24,29,101,73,0,0         // vbroadcastss  0x4965(%rip),%ymm11        # 66d8 <_sk_callback_avx+0x25c>
   .byte  196,65,60,84,195                    // vandps        %ymm11,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,29,115,74,0,0         // vbroadcastss  0x4a73(%rip),%ymm11        # 68b4 <_sk_callback_avx+0x260>
+  .byte  196,98,125,24,29,86,73,0,0          // vbroadcastss  0x4956(%rip),%ymm11        # 66dc <_sk_callback_avx+0x260>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
@@ -16998,9 +16943,9 @@
   .byte  196,65,57,239,192                   // vpxor         %xmm8,%xmm8,%xmm8
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,29,255,255,255               // ja            1dce <_sk_lerp_565_avx+0x14>
+  .byte  15,135,29,255,255,255               // ja            1d13 <_sk_lerp_565_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,76,0,0,0                  // lea           0x4c(%rip),%r9        # 1f08 <_sk_lerp_565_avx+0x14e>
+  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 1e4c <_sk_lerp_565_avx+0x14d>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17012,28 +16957,27 @@
   .byte  196,65,57,196,68,122,4,2            // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
   .byte  196,65,57,196,68,122,2,1            // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
   .byte  196,65,57,196,4,122,0               // vpinsrw       $0x0,(%r10,%rdi,2),%xmm8,%xmm8
-  .byte  233,200,254,255,255                 // jmpq          1dce <_sk_lerp_565_avx+0x14>
-  .byte  102,144                             // xchg          %ax,%ax
-  .byte  242,255                             // repnz         (bad)
+  .byte  233,200,254,255,255                 // jmpq          1d13 <_sk_lerp_565_avx+0x14>
+  .byte  144                                 // nop
+  .byte  243,255                             // repz          (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  234                                 // (bad)
+  .byte  235,255                             // jmp           1e51 <_sk_lerp_565_avx+0x152>
   .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,226                             // jmpq          *%rdx
+  .byte  255,227                             // jmpq          *%rbx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  218,255                             // (bad)
+  .byte  219,255                             // (bad)
   .byte  255                                 // (bad)
-  .byte  255,210                             // callq         *%rdx
+  .byte  255,211                             // callq         *%rbx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,202                             // dec           %edx
+  .byte  255,203                             // dec           %ebx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  189                                 // .byte         0xbd
+  .byte  190                                 // .byte         0xbe
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -17042,19 +16986,21 @@
 .globl _sk_load_tables_avx
 FUNCTION(_sk_load_tables_avx)
 _sk_load_tables_avx:
+  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,139,0                            // mov           (%rax),%r8
-  .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,26,2,0,0                     // jne           214c <_sk_load_tables_avx+0x228>
-  .byte  196,65,124,16,4,184                 // vmovups       (%r8,%rdi,4),%ymm8
+  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
+  .byte  76,3,8                              // add           (%rax),%r9
+  .byte  77,133,192                          // test          %r8,%r8
+  .byte  15,133,26,2,0,0                     // jne           209b <_sk_load_tables_avx+0x233>
+  .byte  196,65,124,16,17                    // vmovups       (%r9),%ymm10
   .byte  85                                  // push          %rbp
   .byte  65,87                               // push          %r15
   .byte  65,86                               // push          %r14
   .byte  65,85                               // push          %r13
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
-  .byte  197,124,40,13,150,76,0,0            // vmovaps       0x4c96(%rip),%ymm9        # 6be0 <_sk_callback_avx+0x58c>
-  .byte  196,193,60,84,193                   // vandps        %ymm9,%ymm8,%ymm0
+  .byte  197,124,40,13,8,76,0,0              // vmovaps       0x4c08(%rip),%ymm9        # 6aa0 <_sk_callback_avx+0x624>
+  .byte  196,193,44,84,193                   // vandps        %ymm9,%ymm10,%ymm0
   .byte  196,193,249,126,193                 // vmovq         %xmm0,%r9
   .byte  69,137,203                          // mov           %r9d,%r11d
   .byte  196,195,249,22,194,1                // vpextrq       $0x1,%xmm0,%r10
@@ -17062,26 +17008,26 @@
   .byte  73,193,234,32                       // shr           $0x20,%r10
   .byte  73,193,233,32                       // shr           $0x20,%r9
   .byte  196,227,125,25,192,1                // vextractf128  $0x1,%ymm0,%xmm0
-  .byte  196,193,249,126,196                 // vmovq         %xmm0,%r12
-  .byte  69,137,231                          // mov           %r12d,%r15d
-  .byte  196,227,249,22,195,1                // vpextrq       $0x1,%xmm0,%rbx
-  .byte  65,137,221                          // mov           %ebx,%r13d
+  .byte  196,225,249,126,195                 // vmovq         %xmm0,%rbx
+  .byte  65,137,223                          // mov           %ebx,%r15d
+  .byte  196,227,249,22,193,1                // vpextrq       $0x1,%xmm0,%rcx
+  .byte  65,137,205                          // mov           %ecx,%r13d
+  .byte  72,193,233,32                       // shr           $0x20,%rcx
   .byte  72,193,235,32                       // shr           $0x20,%rbx
-  .byte  73,193,236,32                       // shr           $0x20,%r12
   .byte  72,139,104,8                        // mov           0x8(%rax),%rbp
-  .byte  76,139,64,16                        // mov           0x10(%rax),%r8
+  .byte  76,139,96,16                        // mov           0x10(%rax),%r12
   .byte  196,161,122,16,68,189,0             // vmovss        0x0(%rbp,%r15,4),%xmm0
-  .byte  196,163,121,33,68,165,0,16          // vinsertps     $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
+  .byte  196,227,121,33,68,157,0,16          // vinsertps     $0x10,0x0(%rbp,%rbx,4),%xmm0,%xmm0
   .byte  196,163,121,33,68,173,0,32          // vinsertps     $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
-  .byte  196,227,121,33,68,157,0,48          // vinsertps     $0x30,0x0(%rbp,%rbx,4),%xmm0,%xmm0
+  .byte  196,227,121,33,68,141,0,48          // vinsertps     $0x30,0x0(%rbp,%rcx,4),%xmm0,%xmm0
   .byte  196,161,122,16,76,157,0             // vmovss        0x0(%rbp,%r11,4),%xmm1
   .byte  196,163,113,33,76,141,0,16          // vinsertps     $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
   .byte  196,163,113,33,76,181,0,32          // vinsertps     $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
   .byte  196,163,113,33,76,149,0,48          // vinsertps     $0x30,0x0(%rbp,%r10,4),%xmm1,%xmm1
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
-  .byte  196,67,125,25,194,1                 // vextractf128  $0x1,%ymm8,%xmm10
-  .byte  196,193,105,114,210,8               // vpsrld        $0x8,%xmm10,%xmm2
+  .byte  196,193,113,114,210,8               // vpsrld        $0x8,%xmm10,%xmm1
+  .byte  196,67,125,25,208,1                 // vextractf128  $0x1,%ymm10,%xmm8
+  .byte  196,193,105,114,208,8               // vpsrld        $0x8,%xmm8,%xmm2
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  196,193,116,84,201                  // vandps        %ymm9,%ymm1,%ymm1
   .byte  196,193,249,126,201                 // vmovq         %xmm1,%r9
@@ -17091,36 +17037,36 @@
   .byte  73,193,234,32                       // shr           $0x20,%r10
   .byte  73,193,233,32                       // shr           $0x20,%r9
   .byte  196,227,125,25,201,1                // vextractf128  $0x1,%ymm1,%xmm1
-  .byte  196,225,249,126,205                 // vmovq         %xmm1,%rbp
-  .byte  65,137,239                          // mov           %ebp,%r15d
-  .byte  196,227,249,22,203,1                // vpextrq       $0x1,%xmm1,%rbx
-  .byte  65,137,220                          // mov           %ebx,%r12d
-  .byte  72,193,235,32                       // shr           $0x20,%rbx
+  .byte  196,225,249,126,203                 // vmovq         %xmm1,%rbx
+  .byte  65,137,223                          // mov           %ebx,%r15d
+  .byte  196,227,249,22,205,1                // vpextrq       $0x1,%xmm1,%rbp
+  .byte  137,233                             // mov           %ebp,%ecx
   .byte  72,193,237,32                       // shr           $0x20,%rbp
-  .byte  196,129,122,16,12,184               // vmovss        (%r8,%r15,4),%xmm1
-  .byte  196,195,113,33,12,168,16            // vinsertps     $0x10,(%r8,%rbp,4),%xmm1,%xmm1
-  .byte  196,129,122,16,20,160               // vmovss        (%r8,%r12,4),%xmm2
+  .byte  72,193,235,32                       // shr           $0x20,%rbx
+  .byte  196,129,122,16,12,188               // vmovss        (%r12,%r15,4),%xmm1
+  .byte  196,195,113,33,12,156,16            // vinsertps     $0x10,(%r12,%rbx,4),%xmm1,%xmm1
+  .byte  196,193,122,16,20,140               // vmovss        (%r12,%rcx,4),%xmm2
   .byte  196,227,113,33,202,32               // vinsertps     $0x20,%xmm2,%xmm1,%xmm1
-  .byte  196,193,122,16,20,152               // vmovss        (%r8,%rbx,4),%xmm2
+  .byte  196,193,122,16,20,172               // vmovss        (%r12,%rbp,4),%xmm2
   .byte  196,227,113,33,202,48               // vinsertps     $0x30,%xmm2,%xmm1,%xmm1
-  .byte  196,129,122,16,20,152               // vmovss        (%r8,%r11,4),%xmm2
-  .byte  196,131,105,33,20,136,16            // vinsertps     $0x10,(%r8,%r9,4),%xmm2,%xmm2
-  .byte  196,129,122,16,28,176               // vmovss        (%r8,%r14,4),%xmm3
+  .byte  196,129,122,16,20,156               // vmovss        (%r12,%r11,4),%xmm2
+  .byte  196,131,105,33,20,140,16            // vinsertps     $0x10,(%r12,%r9,4),%xmm2,%xmm2
+  .byte  196,129,122,16,28,180               // vmovss        (%r12,%r14,4),%xmm3
   .byte  196,227,105,33,211,32               // vinsertps     $0x20,%xmm3,%xmm2,%xmm2
-  .byte  196,129,122,16,28,144               // vmovss        (%r8,%r10,4),%xmm3
+  .byte  196,129,122,16,28,148               // vmovss        (%r12,%r10,4),%xmm3
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,227,109,24,201,1                // vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
   .byte  72,139,64,24                        // mov           0x18(%rax),%rax
-  .byte  196,193,105,114,208,16              // vpsrld        $0x10,%xmm8,%xmm2
-  .byte  196,193,97,114,210,16               // vpsrld        $0x10,%xmm10,%xmm3
+  .byte  196,193,105,114,210,16              // vpsrld        $0x10,%xmm10,%xmm2
+  .byte  196,193,97,114,208,16               // vpsrld        $0x10,%xmm8,%xmm3
   .byte  196,227,109,24,211,1                // vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
   .byte  196,193,108,84,209                  // vandps        %ymm9,%ymm2,%ymm2
-  .byte  196,193,249,126,208                 // vmovq         %xmm2,%r8
-  .byte  69,137,194                          // mov           %r8d,%r10d
-  .byte  196,195,249,22,209,1                // vpextrq       $0x1,%xmm2,%r9
-  .byte  69,137,203                          // mov           %r9d,%r11d
+  .byte  196,193,249,126,209                 // vmovq         %xmm2,%r9
+  .byte  69,137,202                          // mov           %r9d,%r10d
+  .byte  196,227,249,22,209,1                // vpextrq       $0x1,%xmm2,%rcx
+  .byte  65,137,203                          // mov           %ecx,%r11d
+  .byte  72,193,233,32                       // shr           $0x20,%rcx
   .byte  73,193,233,32                       // shr           $0x20,%r9
-  .byte  73,193,232,32                       // shr           $0x20,%r8
   .byte  196,227,125,25,210,1                // vextractf128  $0x1,%ymm2,%xmm2
   .byte  196,225,249,126,213                 // vmovq         %xmm2,%rbp
   .byte  65,137,238                          // mov           %ebp,%r14d
@@ -17135,19 +17081,20 @@
   .byte  197,250,16,28,152                   // vmovss        (%rax,%rbx,4),%xmm3
   .byte  196,99,105,33,203,48                // vinsertps     $0x30,%xmm3,%xmm2,%xmm9
   .byte  196,161,122,16,28,144               // vmovss        (%rax,%r10,4),%xmm3
-  .byte  196,163,97,33,28,128,16             // vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
+  .byte  196,163,97,33,28,136,16             // vinsertps     $0x10,(%rax,%r9,4),%xmm3,%xmm3
   .byte  196,161,122,16,20,152               // vmovss        (%rax,%r11,4),%xmm2
   .byte  196,227,97,33,210,32                // vinsertps     $0x20,%xmm2,%xmm3,%xmm2
-  .byte  196,161,122,16,28,136               // vmovss        (%rax,%r9,4),%xmm3
+  .byte  197,250,16,28,136                   // vmovss        (%rax,%rcx,4),%xmm3
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,195,109,24,209,1                // vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
-  .byte  196,193,57,114,208,24               // vpsrld        $0x18,%xmm8,%xmm8
-  .byte  196,193,97,114,210,24               // vpsrld        $0x18,%xmm10,%xmm3
-  .byte  196,227,61,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
+  .byte  196,193,49,114,210,24               // vpsrld        $0x18,%xmm10,%xmm9
+  .byte  196,193,97,114,208,24               // vpsrld        $0x18,%xmm8,%xmm3
+  .byte  196,227,53,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,127,71,0,0          // vbroadcastss  0x477f(%rip),%ymm8        # 68b8 <_sk_callback_avx+0x264>
+  .byte  196,98,125,24,5,91,70,0,0           // vbroadcastss  0x465b(%rip),%ymm8        # 66e0 <_sk_callback_avx+0x264>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,93                               // pop           %r13
@@ -17155,57 +17102,20 @@
   .byte  65,95                               // pop           %r15
   .byte  93                                  // pop           %rbp
   .byte  255,224                             // jmpq          *%rax
-  .byte  65,137,201                          // mov           %ecx,%r9d
-  .byte  65,128,225,7                        // and           $0x7,%r9b
-  .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  65,254,201                          // dec           %r9b
-  .byte  65,128,249,6                        // cmp           $0x6,%r9b
-  .byte  15,135,211,253,255,255              // ja            1f38 <_sk_load_tables_avx+0x14>
-  .byte  69,15,182,201                       // movzbl        %r9b,%r9d
-  .byte  76,141,21,140,0,0,0                 // lea           0x8c(%rip),%r10        # 21fc <_sk_load_tables_avx+0x2d8>
-  .byte  79,99,12,138                        // movslq        (%r10,%r9,4),%r9
-  .byte  77,1,209                            // add           %r10,%r9
-  .byte  65,255,225                          // jmpq          *%r9
-  .byte  196,193,121,110,68,184,24           // vmovd         0x18(%r8,%rdi,4),%xmm0
-  .byte  197,249,112,192,68                  // vpshufd       $0x44,%xmm0,%xmm0
-  .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
-  .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
-  .byte  196,99,117,12,192,64                // vblendps      $0x40,%ymm0,%ymm1,%ymm8
-  .byte  196,99,125,25,192,1                 // vextractf128  $0x1,%ymm8,%xmm0
-  .byte  196,195,121,34,68,184,20,1          // vpinsrd       $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
-  .byte  196,99,61,24,192,1                  // vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
-  .byte  196,99,125,25,192,1                 // vextractf128  $0x1,%ymm8,%xmm0
-  .byte  196,195,121,34,68,184,16,0          // vpinsrd       $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
-  .byte  196,99,61,24,192,1                  // vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
-  .byte  196,195,57,34,68,184,12,3           // vpinsrd       $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
-  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  .byte  196,195,57,34,68,184,8,2            // vpinsrd       $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
-  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  .byte  196,195,57,34,68,184,4,1            // vpinsrd       $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
-  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  .byte  196,195,57,34,4,184,0               // vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
-  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  .byte  233,62,253,255,255                  // jmpq          1f38 <_sk_load_tables_avx+0x14>
-  .byte  102,144                             // xchg          %ax,%ax
-  .byte  236                                 // in            (%dx),%al
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  222,255                             // fdivrp        %st,%st(7)
-  .byte  255                                 // (bad)
-  .byte  255,208                             // callq         *%rax
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,194                             // inc           %edx
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,174,255,255,255,154             // ljmp          *-0x65000001(%rsi)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  126,255                             // jle           2215 <_sk_load_tables_avx+0x2f1>
-  .byte  255                                 // (bad)
-  .byte  255                                 // .byte         0xff
+  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
+  .byte  68,41,193                           // sub           %r8d,%ecx
+  .byte  192,225,3                           // shl           $0x3,%cl
+  .byte  73,199,194,255,255,255,255          // mov           $0xffffffffffffffff,%r10
+  .byte  73,211,234                          // shr           %cl,%r10
+  .byte  196,193,249,110,194                 // vmovq         %r10,%xmm0
+  .byte  196,226,121,48,192                  // vpmovzxbw     %xmm0,%xmm0
+  .byte  196,226,121,0,13,61,73,0,0          // vpshufb       0x493d(%rip),%xmm0,%xmm1        # 6a00 <_sk_callback_avx+0x584>
+  .byte  196,226,121,33,201                  // vpmovsxbd     %xmm1,%xmm1
+  .byte  196,226,121,0,5,63,73,0,0           // vpshufb       0x493f(%rip),%xmm0,%xmm0        # 6a10 <_sk_callback_avx+0x594>
+  .byte  196,226,121,33,192                  // vpmovsxbd     %xmm0,%xmm0
+  .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
+  .byte  196,66,125,44,17                    // vmaskmovps    (%r9),%ymm0,%ymm10
+  .byte  233,160,253,255,255                 // jmpq          1e86 <_sk_load_tables_avx+0x1e>
 
 HIDDEN _sk_load_tables_u16_be_avx
 .globl _sk_load_tables_u16_be_avx
@@ -17215,7 +17125,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,113,2,0,0                    // jne           249f <_sk_load_tables_u16_be_avx+0x287>
+  .byte  15,133,113,2,0,0                    // jne           236d <_sk_load_tables_u16_be_avx+0x287>
   .byte  196,1,121,16,4,72                   // vmovupd       (%r8,%r9,2),%xmm8
   .byte  196,129,121,16,84,72,16             // vmovupd       0x10(%r8,%r9,2),%xmm2
   .byte  196,129,121,16,92,72,32             // vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -17237,7 +17147,7 @@
   .byte  197,177,108,208                     // vpunpcklqdq   %xmm0,%xmm9,%xmm2
   .byte  197,177,109,200                     // vpunpckhqdq   %xmm0,%xmm9,%xmm1
   .byte  196,65,57,108,212                   // vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  .byte  197,121,111,29,214,73,0,0           // vmovdqa       0x49d6(%rip),%xmm11        # 6c60 <_sk_callback_avx+0x60c>
+  .byte  197,121,111,29,200,72,0,0           // vmovdqa       0x48c8(%rip),%xmm11        # 6a20 <_sk_callback_avx+0x5a4>
   .byte  196,193,105,219,195                 // vpand         %xmm11,%xmm2,%xmm0
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  196,193,121,105,209                 // vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -17336,7 +17246,7 @@
   .byte  196,226,121,51,219                  // vpmovzxwd     %xmm3,%xmm3
   .byte  196,195,101,24,216,1                // vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,48,68,0,0           // vbroadcastss  0x4430(%rip),%ymm8        # 68bc <_sk_callback_avx+0x268>
+  .byte  196,98,125,24,5,138,67,0,0          // vbroadcastss  0x438a(%rip),%ymm8        # 66e4 <_sk_callback_avx+0x268>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -17349,29 +17259,29 @@
   .byte  196,1,123,16,4,72                   // vmovsd        (%r8,%r9,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            2505 <_sk_load_tables_u16_be_avx+0x2ed>
+  .byte  116,85                              // je            23d3 <_sk_load_tables_u16_be_avx+0x2ed>
   .byte  196,1,57,22,68,72,8                 // vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            2505 <_sk_load_tables_u16_be_avx+0x2ed>
+  .byte  114,72                              // jb            23d3 <_sk_load_tables_u16_be_avx+0x2ed>
   .byte  196,129,123,16,84,72,16             // vmovsd        0x10(%r8,%r9,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            2512 <_sk_load_tables_u16_be_avx+0x2fa>
+  .byte  116,72                              // je            23e0 <_sk_load_tables_u16_be_avx+0x2fa>
   .byte  196,129,105,22,84,72,24             // vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            2512 <_sk_load_tables_u16_be_avx+0x2fa>
+  .byte  114,59                              // jb            23e0 <_sk_load_tables_u16_be_avx+0x2fa>
   .byte  196,129,123,16,92,72,32             // vmovsd        0x20(%r8,%r9,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,97,253,255,255               // je            2249 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  15,132,97,253,255,255               // je            2117 <_sk_load_tables_u16_be_avx+0x31>
   .byte  196,129,97,22,92,72,40              // vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,80,253,255,255               // jb            2249 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  15,130,80,253,255,255               // jb            2117 <_sk_load_tables_u16_be_avx+0x31>
   .byte  196,1,122,126,76,72,48              // vmovq         0x30(%r8,%r9,2),%xmm9
-  .byte  233,68,253,255,255                  // jmpq          2249 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,68,253,255,255                  // jmpq          2117 <_sk_load_tables_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,55,253,255,255                  // jmpq          2249 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,55,253,255,255                  // jmpq          2117 <_sk_load_tables_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,46,253,255,255                  // jmpq          2249 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,46,253,255,255                  // jmpq          2117 <_sk_load_tables_u16_be_avx+0x31>
 
 HIDDEN _sk_load_tables_rgb_u16_be_avx
 .globl _sk_load_tables_rgb_u16_be_avx
@@ -17381,7 +17291,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,127                       // lea           (%rdi,%rdi,2),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,93,2,0,0                     // jne           278a <_sk_load_tables_rgb_u16_be_avx+0x26f>
+  .byte  15,133,93,2,0,0                     // jne           2658 <_sk_load_tables_rgb_u16_be_avx+0x26f>
   .byte  196,129,122,111,4,72                // vmovdqu       (%r8,%r9,2),%xmm0
   .byte  196,129,122,111,84,72,12            // vmovdqu       0xc(%r8,%r9,2),%xmm2
   .byte  196,129,122,111,76,72,24            // vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -17408,7 +17318,7 @@
   .byte  197,185,108,202                     // vpunpcklqdq   %xmm2,%xmm8,%xmm1
   .byte  197,185,109,210                     // vpunpckhqdq   %xmm2,%xmm8,%xmm2
   .byte  197,121,108,195                     // vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  .byte  197,121,111,13,207,70,0,0           // vmovdqa       0x46cf(%rip),%xmm9        # 6c70 <_sk_callback_avx+0x61c>
+  .byte  197,121,111,13,193,69,0,0           // vmovdqa       0x45c1(%rip),%xmm9        # 6a30 <_sk_callback_avx+0x5b4>
   .byte  196,193,113,219,193                 // vpand         %xmm9,%xmm1,%xmm0
   .byte  196,65,41,239,210                   // vpxor         %xmm10,%xmm10,%xmm10
   .byte  196,193,121,105,202                 // vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -17500,7 +17410,7 @@
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,195,109,24,208,1                // vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,66,65,0,0         // vbroadcastss  0x4142(%rip),%ymm3        # 68c0 <_sk_callback_avx+0x26c>
+  .byte  196,226,125,24,29,156,64,0,0        // vbroadcastss  0x409c(%rip),%ymm3        # 66e8 <_sk_callback_avx+0x26c>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,93                               // pop           %r13
@@ -17511,36 +17421,36 @@
   .byte  196,129,121,110,4,72                // vmovd         (%r8,%r9,2),%xmm0
   .byte  196,129,121,196,68,72,4,2           // vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           27a3 <_sk_load_tables_rgb_u16_be_avx+0x288>
-  .byte  233,190,253,255,255                 // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,5                               // jne           2671 <_sk_load_tables_rgb_u16_be_avx+0x288>
+  .byte  233,190,253,255,255                 // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,76,72,6             // vmovd         0x6(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,68,72,10,2            // vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            27d2 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+  .byte  114,26                              // jb            26a0 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
   .byte  196,129,121,110,76,72,12            // vmovd         0xc(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,84,72,16,2          // vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           27d7 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
-  .byte  233,143,253,255,255                 // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,138,253,255,255                 // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           26a5 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+  .byte  233,143,253,255,255                 // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,138,253,255,255                 // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,76,72,18            // vmovd         0x12(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,76,72,22,2            // vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            2806 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+  .byte  114,26                              // jb            26d4 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
   .byte  196,129,121,110,76,72,24            // vmovd         0x18(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,76,72,28,2          // vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           280b <_sk_load_tables_rgb_u16_be_avx+0x2f0>
-  .byte  233,91,253,255,255                  // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,86,253,255,255                  // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           26d9 <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+  .byte  233,91,253,255,255                  // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,86,253,255,255                  // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,92,72,30            // vmovd         0x1e(%r8,%r9,2),%xmm3
   .byte  196,1,97,196,92,72,34,2             // vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            2834 <_sk_load_tables_rgb_u16_be_avx+0x319>
+  .byte  114,20                              // jb            2702 <_sk_load_tables_rgb_u16_be_avx+0x319>
   .byte  196,129,121,110,92,72,36            // vmovd         0x24(%r8,%r9,2),%xmm3
   .byte  196,129,97,196,92,72,40,2           // vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  .byte  233,45,253,255,255                  // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,40,253,255,255                  // jmpq          2561 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,45,253,255,255                  // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,40,253,255,255                  // jmpq          242f <_sk_load_tables_rgb_u16_be_avx+0x46>
 
 HIDDEN _sk_byte_tables_avx
 .globl _sk_byte_tables_avx
@@ -17553,7 +17463,7 @@
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,118,64,0,0          // vbroadcastss  0x4076(%rip),%ymm8        # 68c4 <_sk_callback_avx+0x270>
+  .byte  196,98,125,24,5,208,63,0,0          // vbroadcastss  0x3fd0(%rip),%ymm8        # 66ec <_sk_callback_avx+0x270>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
@@ -17590,7 +17500,7 @@
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,196,63,0,0         // vbroadcastss  0x3fc4(%rip),%ymm9        # 68c8 <_sk_callback_avx+0x274>
+  .byte  196,98,125,24,13,30,63,0,0          // vbroadcastss  0x3f1e(%rip),%ymm9        # 66f0 <_sk_callback_avx+0x274>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -17752,7 +17662,7 @@
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,234,60,0,0         // vbroadcastss  0x3cea(%rip),%ymm9        # 68cc <_sk_callback_avx+0x278>
+  .byte  196,98,125,24,13,68,60,0,0          // vbroadcastss  0x3c44(%rip),%ymm9        # 66f4 <_sk_callback_avx+0x278>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -18049,36 +17959,36 @@
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,216                      // vcvtdq2ps     %ymm0,%ymm11
-  .byte  196,98,125,24,37,72,56,0,0          // vbroadcastss  0x3848(%rip),%ymm12        # 68d0 <_sk_callback_avx+0x27c>
+  .byte  196,98,125,24,37,162,55,0,0         // vbroadcastss  0x37a2(%rip),%ymm12        # 66f8 <_sk_callback_avx+0x27c>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,62,56,0,0          // vbroadcastss  0x383e(%rip),%ymm12        # 68d4 <_sk_callback_avx+0x280>
+  .byte  196,98,125,24,37,152,55,0,0         // vbroadcastss  0x3798(%rip),%ymm12        # 66fc <_sk_callback_avx+0x280>
   .byte  196,193,124,84,196                  // vandps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,52,56,0,0          // vbroadcastss  0x3834(%rip),%ymm12        # 68d8 <_sk_callback_avx+0x284>
+  .byte  196,98,125,24,37,142,55,0,0         // vbroadcastss  0x378e(%rip),%ymm12        # 6700 <_sk_callback_avx+0x284>
   .byte  196,193,124,86,196                  // vorps         %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,42,56,0,0          // vbroadcastss  0x382a(%rip),%ymm12        # 68dc <_sk_callback_avx+0x288>
+  .byte  196,98,125,24,37,132,55,0,0         // vbroadcastss  0x3784(%rip),%ymm12        # 6704 <_sk_callback_avx+0x288>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,32,56,0,0          // vbroadcastss  0x3820(%rip),%ymm12        # 68e0 <_sk_callback_avx+0x28c>
+  .byte  196,98,125,24,37,122,55,0,0         // vbroadcastss  0x377a(%rip),%ymm12        # 6708 <_sk_callback_avx+0x28c>
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,17,56,0,0          // vbroadcastss  0x3811(%rip),%ymm12        # 68e4 <_sk_callback_avx+0x290>
+  .byte  196,98,125,24,37,107,55,0,0         // vbroadcastss  0x376b(%rip),%ymm12        # 670c <_sk_callback_avx+0x290>
   .byte  196,193,124,88,196                  // vaddps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,7,56,0,0           // vbroadcastss  0x3807(%rip),%ymm12        # 68e8 <_sk_callback_avx+0x294>
+  .byte  196,98,125,24,37,97,55,0,0          // vbroadcastss  0x3761(%rip),%ymm12        # 6710 <_sk_callback_avx+0x294>
   .byte  197,156,94,192                      // vdivps        %ymm0,%ymm12,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
   .byte  197,172,89,192                      // vmulps        %ymm0,%ymm10,%ymm0
   .byte  196,99,125,8,208,1                  // vroundps      $0x1,%ymm0,%ymm10
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,235,55,0,0         // vbroadcastss  0x37eb(%rip),%ymm11        # 68ec <_sk_callback_avx+0x298>
+  .byte  196,98,125,24,29,69,55,0,0          // vbroadcastss  0x3745(%rip),%ymm11        # 6714 <_sk_callback_avx+0x298>
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,225,55,0,0         // vbroadcastss  0x37e1(%rip),%ymm11        # 68f0 <_sk_callback_avx+0x29c>
+  .byte  196,98,125,24,29,59,55,0,0          // vbroadcastss  0x373b(%rip),%ymm11        # 6718 <_sk_callback_avx+0x29c>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,124,92,195                  // vsubps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,210,55,0,0         // vbroadcastss  0x37d2(%rip),%ymm11        # 68f4 <_sk_callback_avx+0x2a0>
+  .byte  196,98,125,24,29,44,55,0,0          // vbroadcastss  0x372c(%rip),%ymm11        # 671c <_sk_callback_avx+0x2a0>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,200,55,0,0         // vbroadcastss  0x37c8(%rip),%ymm11        # 68f8 <_sk_callback_avx+0x2a4>
+  .byte  196,98,125,24,29,34,55,0,0          // vbroadcastss  0x3722(%rip),%ymm11        # 6720 <_sk_callback_avx+0x2a4>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,124,88,194                  // vaddps        %ymm10,%ymm0,%ymm0
-  .byte  196,98,125,24,21,185,55,0,0         // vbroadcastss  0x37b9(%rip),%ymm10        # 68fc <_sk_callback_avx+0x2a8>
+  .byte  196,98,125,24,21,19,55,0,0          // vbroadcastss  0x3713(%rip),%ymm10        # 6724 <_sk_callback_avx+0x2a8>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -18086,7 +17996,7 @@
   .byte  196,195,125,74,193,128              // vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,144,55,0,0          // vbroadcastss  0x3790(%rip),%ymm8        # 6900 <_sk_callback_avx+0x2ac>
+  .byte  196,98,125,24,5,234,54,0,0          // vbroadcastss  0x36ea(%rip),%ymm8        # 6728 <_sk_callback_avx+0x2ac>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18108,36 +18018,36 @@
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,217                      // vcvtdq2ps     %ymm1,%ymm11
-  .byte  196,98,125,24,37,65,55,0,0          // vbroadcastss  0x3741(%rip),%ymm12        # 6904 <_sk_callback_avx+0x2b0>
+  .byte  196,98,125,24,37,155,54,0,0         // vbroadcastss  0x369b(%rip),%ymm12        # 672c <_sk_callback_avx+0x2b0>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,55,55,0,0          // vbroadcastss  0x3737(%rip),%ymm12        # 6908 <_sk_callback_avx+0x2b4>
+  .byte  196,98,125,24,37,145,54,0,0         // vbroadcastss  0x3691(%rip),%ymm12        # 6730 <_sk_callback_avx+0x2b4>
   .byte  196,193,116,84,204                  // vandps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,45,55,0,0          // vbroadcastss  0x372d(%rip),%ymm12        # 690c <_sk_callback_avx+0x2b8>
+  .byte  196,98,125,24,37,135,54,0,0         // vbroadcastss  0x3687(%rip),%ymm12        # 6734 <_sk_callback_avx+0x2b8>
   .byte  196,193,116,86,204                  // vorps         %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,35,55,0,0          // vbroadcastss  0x3723(%rip),%ymm12        # 6910 <_sk_callback_avx+0x2bc>
+  .byte  196,98,125,24,37,125,54,0,0         // vbroadcastss  0x367d(%rip),%ymm12        # 6738 <_sk_callback_avx+0x2bc>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,25,55,0,0          // vbroadcastss  0x3719(%rip),%ymm12        # 6914 <_sk_callback_avx+0x2c0>
+  .byte  196,98,125,24,37,115,54,0,0         // vbroadcastss  0x3673(%rip),%ymm12        # 673c <_sk_callback_avx+0x2c0>
   .byte  196,65,116,89,228                   // vmulps        %ymm12,%ymm1,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,10,55,0,0          // vbroadcastss  0x370a(%rip),%ymm12        # 6918 <_sk_callback_avx+0x2c4>
+  .byte  196,98,125,24,37,100,54,0,0         // vbroadcastss  0x3664(%rip),%ymm12        # 6740 <_sk_callback_avx+0x2c4>
   .byte  196,193,116,88,204                  // vaddps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,0,55,0,0           // vbroadcastss  0x3700(%rip),%ymm12        # 691c <_sk_callback_avx+0x2c8>
+  .byte  196,98,125,24,37,90,54,0,0          // vbroadcastss  0x365a(%rip),%ymm12        # 6744 <_sk_callback_avx+0x2c8>
   .byte  197,156,94,201                      // vdivps        %ymm1,%ymm12,%ymm1
   .byte  197,164,92,201                      // vsubps        %ymm1,%ymm11,%ymm1
   .byte  197,172,89,201                      // vmulps        %ymm1,%ymm10,%ymm1
   .byte  196,99,125,8,209,1                  // vroundps      $0x1,%ymm1,%ymm10
   .byte  196,65,116,92,210                   // vsubps        %ymm10,%ymm1,%ymm10
-  .byte  196,98,125,24,29,228,54,0,0         // vbroadcastss  0x36e4(%rip),%ymm11        # 6920 <_sk_callback_avx+0x2cc>
+  .byte  196,98,125,24,29,62,54,0,0          // vbroadcastss  0x363e(%rip),%ymm11        # 6748 <_sk_callback_avx+0x2cc>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,218,54,0,0         // vbroadcastss  0x36da(%rip),%ymm11        # 6924 <_sk_callback_avx+0x2d0>
+  .byte  196,98,125,24,29,52,54,0,0          // vbroadcastss  0x3634(%rip),%ymm11        # 674c <_sk_callback_avx+0x2d0>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,116,92,203                  // vsubps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,203,54,0,0         // vbroadcastss  0x36cb(%rip),%ymm11        # 6928 <_sk_callback_avx+0x2d4>
+  .byte  196,98,125,24,29,37,54,0,0          // vbroadcastss  0x3625(%rip),%ymm11        # 6750 <_sk_callback_avx+0x2d4>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,193,54,0,0         // vbroadcastss  0x36c1(%rip),%ymm11        # 692c <_sk_callback_avx+0x2d8>
+  .byte  196,98,125,24,29,27,54,0,0          // vbroadcastss  0x361b(%rip),%ymm11        # 6754 <_sk_callback_avx+0x2d8>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,116,88,202                  // vaddps        %ymm10,%ymm1,%ymm1
-  .byte  196,98,125,24,21,178,54,0,0         // vbroadcastss  0x36b2(%rip),%ymm10        # 6930 <_sk_callback_avx+0x2dc>
+  .byte  196,98,125,24,21,12,54,0,0          // vbroadcastss  0x360c(%rip),%ymm10        # 6758 <_sk_callback_avx+0x2dc>
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -18145,7 +18055,7 @@
   .byte  196,195,117,74,201,128              // vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,116,95,200                  // vmaxps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,5,137,54,0,0          // vbroadcastss  0x3689(%rip),%ymm8        # 6934 <_sk_callback_avx+0x2e0>
+  .byte  196,98,125,24,5,227,53,0,0          // vbroadcastss  0x35e3(%rip),%ymm8        # 675c <_sk_callback_avx+0x2e0>
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18167,36 +18077,36 @@
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,218                      // vcvtdq2ps     %ymm2,%ymm11
-  .byte  196,98,125,24,37,58,54,0,0          // vbroadcastss  0x363a(%rip),%ymm12        # 6938 <_sk_callback_avx+0x2e4>
+  .byte  196,98,125,24,37,148,53,0,0         // vbroadcastss  0x3594(%rip),%ymm12        # 6760 <_sk_callback_avx+0x2e4>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,48,54,0,0          // vbroadcastss  0x3630(%rip),%ymm12        # 693c <_sk_callback_avx+0x2e8>
+  .byte  196,98,125,24,37,138,53,0,0         // vbroadcastss  0x358a(%rip),%ymm12        # 6764 <_sk_callback_avx+0x2e8>
   .byte  196,193,108,84,212                  // vandps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,38,54,0,0          // vbroadcastss  0x3626(%rip),%ymm12        # 6940 <_sk_callback_avx+0x2ec>
+  .byte  196,98,125,24,37,128,53,0,0         // vbroadcastss  0x3580(%rip),%ymm12        # 6768 <_sk_callback_avx+0x2ec>
   .byte  196,193,108,86,212                  // vorps         %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,28,54,0,0          // vbroadcastss  0x361c(%rip),%ymm12        # 6944 <_sk_callback_avx+0x2f0>
+  .byte  196,98,125,24,37,118,53,0,0         // vbroadcastss  0x3576(%rip),%ymm12        # 676c <_sk_callback_avx+0x2f0>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,18,54,0,0          // vbroadcastss  0x3612(%rip),%ymm12        # 6948 <_sk_callback_avx+0x2f4>
+  .byte  196,98,125,24,37,108,53,0,0         // vbroadcastss  0x356c(%rip),%ymm12        # 6770 <_sk_callback_avx+0x2f4>
   .byte  196,65,108,89,228                   // vmulps        %ymm12,%ymm2,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,3,54,0,0           // vbroadcastss  0x3603(%rip),%ymm12        # 694c <_sk_callback_avx+0x2f8>
+  .byte  196,98,125,24,37,93,53,0,0          // vbroadcastss  0x355d(%rip),%ymm12        # 6774 <_sk_callback_avx+0x2f8>
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,249,53,0,0         // vbroadcastss  0x35f9(%rip),%ymm12        # 6950 <_sk_callback_avx+0x2fc>
+  .byte  196,98,125,24,37,83,53,0,0          // vbroadcastss  0x3553(%rip),%ymm12        # 6778 <_sk_callback_avx+0x2fc>
   .byte  197,156,94,210                      // vdivps        %ymm2,%ymm12,%ymm2
   .byte  197,164,92,210                      // vsubps        %ymm2,%ymm11,%ymm2
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  196,99,125,8,210,1                  // vroundps      $0x1,%ymm2,%ymm10
   .byte  196,65,108,92,210                   // vsubps        %ymm10,%ymm2,%ymm10
-  .byte  196,98,125,24,29,221,53,0,0         // vbroadcastss  0x35dd(%rip),%ymm11        # 6954 <_sk_callback_avx+0x300>
+  .byte  196,98,125,24,29,55,53,0,0          // vbroadcastss  0x3537(%rip),%ymm11        # 677c <_sk_callback_avx+0x300>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,211,53,0,0         // vbroadcastss  0x35d3(%rip),%ymm11        # 6958 <_sk_callback_avx+0x304>
+  .byte  196,98,125,24,29,45,53,0,0          // vbroadcastss  0x352d(%rip),%ymm11        # 6780 <_sk_callback_avx+0x304>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,108,92,211                  // vsubps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,196,53,0,0         // vbroadcastss  0x35c4(%rip),%ymm11        # 695c <_sk_callback_avx+0x308>
+  .byte  196,98,125,24,29,30,53,0,0          // vbroadcastss  0x351e(%rip),%ymm11        # 6784 <_sk_callback_avx+0x308>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,186,53,0,0         // vbroadcastss  0x35ba(%rip),%ymm11        # 6960 <_sk_callback_avx+0x30c>
+  .byte  196,98,125,24,29,20,53,0,0          // vbroadcastss  0x3514(%rip),%ymm11        # 6788 <_sk_callback_avx+0x30c>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,108,88,210                  // vaddps        %ymm10,%ymm2,%ymm2
-  .byte  196,98,125,24,21,171,53,0,0         // vbroadcastss  0x35ab(%rip),%ymm10        # 6964 <_sk_callback_avx+0x310>
+  .byte  196,98,125,24,21,5,53,0,0           // vbroadcastss  0x3505(%rip),%ymm10        # 678c <_sk_callback_avx+0x310>
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  197,253,91,210                      // vcvtps2dq     %ymm2,%ymm2
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -18204,7 +18114,7 @@
   .byte  196,195,109,74,209,128              // vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,130,53,0,0          // vbroadcastss  0x3582(%rip),%ymm8        # 6968 <_sk_callback_avx+0x314>
+  .byte  196,98,125,24,5,220,52,0,0          // vbroadcastss  0x34dc(%rip),%ymm8        # 6790 <_sk_callback_avx+0x314>
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18226,36 +18136,36 @@
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,219                      // vcvtdq2ps     %ymm3,%ymm11
-  .byte  196,98,125,24,37,51,53,0,0          // vbroadcastss  0x3533(%rip),%ymm12        # 696c <_sk_callback_avx+0x318>
+  .byte  196,98,125,24,37,141,52,0,0         // vbroadcastss  0x348d(%rip),%ymm12        # 6794 <_sk_callback_avx+0x318>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,41,53,0,0          // vbroadcastss  0x3529(%rip),%ymm12        # 6970 <_sk_callback_avx+0x31c>
+  .byte  196,98,125,24,37,131,52,0,0         // vbroadcastss  0x3483(%rip),%ymm12        # 6798 <_sk_callback_avx+0x31c>
   .byte  196,193,100,84,220                  // vandps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,31,53,0,0          // vbroadcastss  0x351f(%rip),%ymm12        # 6974 <_sk_callback_avx+0x320>
+  .byte  196,98,125,24,37,121,52,0,0         // vbroadcastss  0x3479(%rip),%ymm12        # 679c <_sk_callback_avx+0x320>
   .byte  196,193,100,86,220                  // vorps         %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,21,53,0,0          // vbroadcastss  0x3515(%rip),%ymm12        # 6978 <_sk_callback_avx+0x324>
+  .byte  196,98,125,24,37,111,52,0,0         // vbroadcastss  0x346f(%rip),%ymm12        # 67a0 <_sk_callback_avx+0x324>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,11,53,0,0          // vbroadcastss  0x350b(%rip),%ymm12        # 697c <_sk_callback_avx+0x328>
+  .byte  196,98,125,24,37,101,52,0,0         // vbroadcastss  0x3465(%rip),%ymm12        # 67a4 <_sk_callback_avx+0x328>
   .byte  196,65,100,89,228                   // vmulps        %ymm12,%ymm3,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,252,52,0,0         // vbroadcastss  0x34fc(%rip),%ymm12        # 6980 <_sk_callback_avx+0x32c>
+  .byte  196,98,125,24,37,86,52,0,0          // vbroadcastss  0x3456(%rip),%ymm12        # 67a8 <_sk_callback_avx+0x32c>
   .byte  196,193,100,88,220                  // vaddps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,242,52,0,0         // vbroadcastss  0x34f2(%rip),%ymm12        # 6984 <_sk_callback_avx+0x330>
+  .byte  196,98,125,24,37,76,52,0,0          // vbroadcastss  0x344c(%rip),%ymm12        # 67ac <_sk_callback_avx+0x330>
   .byte  197,156,94,219                      // vdivps        %ymm3,%ymm12,%ymm3
   .byte  197,164,92,219                      // vsubps        %ymm3,%ymm11,%ymm3
   .byte  197,172,89,219                      // vmulps        %ymm3,%ymm10,%ymm3
   .byte  196,99,125,8,211,1                  // vroundps      $0x1,%ymm3,%ymm10
   .byte  196,65,100,92,210                   // vsubps        %ymm10,%ymm3,%ymm10
-  .byte  196,98,125,24,29,214,52,0,0         // vbroadcastss  0x34d6(%rip),%ymm11        # 6988 <_sk_callback_avx+0x334>
+  .byte  196,98,125,24,29,48,52,0,0          // vbroadcastss  0x3430(%rip),%ymm11        # 67b0 <_sk_callback_avx+0x334>
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,204,52,0,0         // vbroadcastss  0x34cc(%rip),%ymm11        # 698c <_sk_callback_avx+0x338>
+  .byte  196,98,125,24,29,38,52,0,0          // vbroadcastss  0x3426(%rip),%ymm11        # 67b4 <_sk_callback_avx+0x338>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,100,92,219                  // vsubps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,189,52,0,0         // vbroadcastss  0x34bd(%rip),%ymm11        # 6990 <_sk_callback_avx+0x33c>
+  .byte  196,98,125,24,29,23,52,0,0          // vbroadcastss  0x3417(%rip),%ymm11        # 67b8 <_sk_callback_avx+0x33c>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,179,52,0,0         // vbroadcastss  0x34b3(%rip),%ymm11        # 6994 <_sk_callback_avx+0x340>
+  .byte  196,98,125,24,29,13,52,0,0          // vbroadcastss  0x340d(%rip),%ymm11        # 67bc <_sk_callback_avx+0x340>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,100,88,218                  // vaddps        %ymm10,%ymm3,%ymm3
-  .byte  196,98,125,24,21,164,52,0,0         // vbroadcastss  0x34a4(%rip),%ymm10        # 6998 <_sk_callback_avx+0x344>
+  .byte  196,98,125,24,21,254,51,0,0         // vbroadcastss  0x33fe(%rip),%ymm10        # 67c0 <_sk_callback_avx+0x344>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,253,91,219                      // vcvtps2dq     %ymm3,%ymm3
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -18263,7 +18173,7 @@
   .byte  196,195,101,74,217,128              // vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,100,95,216                  // vmaxps        %ymm8,%ymm3,%ymm3
-  .byte  196,98,125,24,5,123,52,0,0          // vbroadcastss  0x347b(%rip),%ymm8        # 699c <_sk_callback_avx+0x348>
+  .byte  196,98,125,24,5,213,51,0,0          // vbroadcastss  0x33d5(%rip),%ymm8        # 67c4 <_sk_callback_avx+0x348>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18272,31 +18182,31 @@
 .globl _sk_lab_to_xyz_avx
 FUNCTION(_sk_lab_to_xyz_avx)
 _sk_lab_to_xyz_avx:
-  .byte  196,98,125,24,5,109,52,0,0          // vbroadcastss  0x346d(%rip),%ymm8        # 69a0 <_sk_callback_avx+0x34c>
+  .byte  196,98,125,24,5,199,51,0,0          // vbroadcastss  0x33c7(%rip),%ymm8        # 67c8 <_sk_callback_avx+0x34c>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,99,52,0,0           // vbroadcastss  0x3463(%rip),%ymm8        # 69a4 <_sk_callback_avx+0x350>
+  .byte  196,98,125,24,5,189,51,0,0          // vbroadcastss  0x33bd(%rip),%ymm8        # 67cc <_sk_callback_avx+0x350>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,13,89,52,0,0          // vbroadcastss  0x3459(%rip),%ymm9        # 69a8 <_sk_callback_avx+0x354>
+  .byte  196,98,125,24,13,179,51,0,0         // vbroadcastss  0x33b3(%rip),%ymm9        # 67d0 <_sk_callback_avx+0x354>
   .byte  196,193,116,88,201                  // vaddps        %ymm9,%ymm1,%ymm1
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  196,193,108,88,209                  // vaddps        %ymm9,%ymm2,%ymm2
-  .byte  196,98,125,24,5,69,52,0,0           // vbroadcastss  0x3445(%rip),%ymm8        # 69ac <_sk_callback_avx+0x358>
+  .byte  196,98,125,24,5,159,51,0,0          // vbroadcastss  0x339f(%rip),%ymm8        # 67d4 <_sk_callback_avx+0x358>
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,59,52,0,0           // vbroadcastss  0x343b(%rip),%ymm8        # 69b0 <_sk_callback_avx+0x35c>
+  .byte  196,98,125,24,5,149,51,0,0          // vbroadcastss  0x3395(%rip),%ymm8        # 67d8 <_sk_callback_avx+0x35c>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,49,52,0,0           // vbroadcastss  0x3431(%rip),%ymm8        # 69b4 <_sk_callback_avx+0x360>
+  .byte  196,98,125,24,5,139,51,0,0          // vbroadcastss  0x338b(%rip),%ymm8        # 67dc <_sk_callback_avx+0x360>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,252,88,201                      // vaddps        %ymm1,%ymm0,%ymm1
-  .byte  196,98,125,24,5,35,52,0,0           // vbroadcastss  0x3423(%rip),%ymm8        # 69b8 <_sk_callback_avx+0x364>
+  .byte  196,98,125,24,5,125,51,0,0          // vbroadcastss  0x337d(%rip),%ymm8        # 67e0 <_sk_callback_avx+0x364>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,252,92,210                      // vsubps        %ymm2,%ymm0,%ymm2
   .byte  197,116,89,193                      // vmulps        %ymm1,%ymm1,%ymm8
   .byte  196,65,116,89,192                   // vmulps        %ymm8,%ymm1,%ymm8
-  .byte  196,98,125,24,13,12,52,0,0          // vbroadcastss  0x340c(%rip),%ymm9        # 69bc <_sk_callback_avx+0x368>
+  .byte  196,98,125,24,13,102,51,0,0         // vbroadcastss  0x3366(%rip),%ymm9        # 67e4 <_sk_callback_avx+0x368>
   .byte  196,65,52,194,208,1                 // vcmpltps      %ymm8,%ymm9,%ymm10
-  .byte  196,98,125,24,29,1,52,0,0           // vbroadcastss  0x3401(%rip),%ymm11        # 69c0 <_sk_callback_avx+0x36c>
+  .byte  196,98,125,24,29,91,51,0,0          // vbroadcastss  0x335b(%rip),%ymm11        # 67e8 <_sk_callback_avx+0x36c>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,37,247,51,0,0         // vbroadcastss  0x33f7(%rip),%ymm12        # 69c4 <_sk_callback_avx+0x370>
+  .byte  196,98,125,24,37,81,51,0,0          // vbroadcastss  0x3351(%rip),%ymm12        # 67ec <_sk_callback_avx+0x370>
   .byte  196,193,116,89,204                  // vmulps        %ymm12,%ymm1,%ymm1
   .byte  196,67,117,74,192,160               // vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   .byte  197,252,89,200                      // vmulps        %ymm0,%ymm0,%ymm1
@@ -18311,9 +18221,9 @@
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,193,108,89,212                  // vmulps        %ymm12,%ymm2,%ymm2
   .byte  196,227,109,74,208,144              // vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  .byte  196,226,125,24,5,173,51,0,0         // vbroadcastss  0x33ad(%rip),%ymm0        # 69c8 <_sk_callback_avx+0x374>
+  .byte  196,226,125,24,5,7,51,0,0           // vbroadcastss  0x3307(%rip),%ymm0        # 67f0 <_sk_callback_avx+0x374>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,164,51,0,0          // vbroadcastss  0x33a4(%rip),%ymm8        # 69cc <_sk_callback_avx+0x378>
+  .byte  196,98,125,24,5,254,50,0,0          // vbroadcastss  0x32fe(%rip),%ymm8        # 67f4 <_sk_callback_avx+0x378>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18327,14 +18237,14 @@
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,62                              // jne           367f <_sk_load_a8_avx+0x4e>
+  .byte  117,62                              // jne           354d <_sk_load_a8_avx+0x4e>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,121,49,200                  // vpmovzxbd     %xmm0,%xmm1
   .byte  196,227,121,4,192,229               // vpermilps     $0xe5,%xmm0,%xmm0
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,104,51,0,0        // vbroadcastss  0x3368(%rip),%ymm1        # 69d0 <_sk_callback_avx+0x37c>
+  .byte  196,226,125,24,13,194,50,0,0        // vbroadcastss  0x32c2(%rip),%ymm1        # 67f8 <_sk_callback_avx+0x37c>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -18351,9 +18261,9 @@
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           3687 <_sk_load_a8_avx+0x56>
+  .byte  117,234                             // jne           3555 <_sk_load_a8_avx+0x56>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,161                             // jmp           3645 <_sk_load_a8_avx+0x14>
+  .byte  235,161                             // jmp           3513 <_sk_load_a8_avx+0x14>
 
 HIDDEN _sk_gather_a8_avx
 .globl _sk_gather_a8_avx
@@ -18403,7 +18313,7 @@
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,93,50,0,0         // vbroadcastss  0x325d(%rip),%ymm1        # 69d4 <_sk_callback_avx+0x380>
+  .byte  196,226,125,24,13,183,49,0,0        // vbroadcastss  0x31b7(%rip),%ymm1        # 67fc <_sk_callback_avx+0x380>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -18421,14 +18331,14 @@
 _sk_store_a8_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,56,50,0,0           // vbroadcastss  0x3238(%rip),%ymm8        # 69d8 <_sk_callback_avx+0x384>
+  .byte  196,98,125,24,5,146,49,0,0          // vbroadcastss  0x3192(%rip),%ymm8        # 6800 <_sk_callback_avx+0x384>
   .byte  196,65,100,89,192                   // vmulps        %ymm8,%ymm3,%ymm8
   .byte  196,65,125,91,192                   // vcvtps2dq     %ymm8,%ymm8
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  196,65,57,103,192                   // vpackuswb     %xmm8,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           37c9 <_sk_store_a8_avx+0x37>
+  .byte  117,10                              // jne           3697 <_sk_store_a8_avx+0x37>
   .byte  196,65,123,17,4,58                  // vmovsd        %xmm8,(%r10,%rdi,1)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18436,10 +18346,10 @@
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            37c5 <_sk_store_a8_avx+0x33>
+  .byte  119,236                             // ja            3693 <_sk_store_a8_avx+0x33>
   .byte  196,66,121,48,192                   // vpmovzxbw     %xmm8,%xmm8
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,67,0,0,0                  // lea           0x43(%rip),%r9        # 382c <_sk_store_a8_avx+0x9a>
+  .byte  76,141,13,69,0,0,0                  // lea           0x45(%rip),%r9        # 36fc <_sk_store_a8_avx+0x9c>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18450,27 +18360,28 @@
   .byte  196,67,121,20,68,58,2,4             // vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   .byte  196,67,121,20,68,58,1,2             // vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   .byte  196,67,121,20,4,58,0                // vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  .byte  235,154                             // jmp           37c5 <_sk_store_a8_avx+0x33>
-  .byte  144                                 // nop
-  .byte  246,255                             // idiv          %bh
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  238                                 // out           %al,(%dx)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,230                             // jmpq          *%rsi
+  .byte  235,154                             // jmp           3693 <_sk_store_a8_avx+0x33>
+  .byte  15,31,0                             // nopl          (%rax)
+  .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  222,255                             // fdivrp        %st,%st(7)
-  .byte  255                                 // (bad)
-  .byte  255,214                             // callq         *%rsi
+  .byte  236                                 // in            (%dx),%al
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,206                             // dec           %esi
+  .byte  255,228                             // jmpq          *%rsp
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,198                             // inc           %esi
+  .byte  255                                 // (bad)
+  .byte  220,255                             // fdivr         %st,%st(7)
+  .byte  255                                 // (bad)
+  .byte  255,212                             // callq         *%rsp
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,204                             // dec           %esp
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,196                             // inc           %esp
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -18484,17 +18395,17 @@
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,67                              // jne           389b <_sk_load_g8_avx+0x53>
+  .byte  117,67                              // jne           376b <_sk_load_g8_avx+0x53>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,121,49,200                  // vpmovzxbd     %xmm0,%xmm1
   .byte  196,227,121,4,192,229               // vpermilps     $0xe5,%xmm0,%xmm0
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,93,49,0,0         // vbroadcastss  0x315d(%rip),%ymm1        # 69dc <_sk_callback_avx+0x388>
+  .byte  196,226,125,24,13,181,48,0,0        // vbroadcastss  0x30b5(%rip),%ymm1        # 6804 <_sk_callback_avx+0x388>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,82,49,0,0         // vbroadcastss  0x3152(%rip),%ymm3        # 69e0 <_sk_callback_avx+0x38c>
+  .byte  196,226,125,24,29,170,48,0,0        // vbroadcastss  0x30aa(%rip),%ymm3        # 6808 <_sk_callback_avx+0x38c>
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
@@ -18508,9 +18419,9 @@
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           38a3 <_sk_load_g8_avx+0x5b>
+  .byte  117,234                             // jne           3773 <_sk_load_g8_avx+0x5b>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,156                             // jmp           385c <_sk_load_g8_avx+0x14>
+  .byte  235,156                             // jmp           372c <_sk_load_g8_avx+0x14>
 
 HIDDEN _sk_gather_g8_avx
 .globl _sk_gather_g8_avx
@@ -18560,10 +18471,10 @@
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,81,48,0,0         // vbroadcastss  0x3051(%rip),%ymm1        # 69e4 <_sk_callback_avx+0x390>
+  .byte  196,226,125,24,13,169,47,0,0        // vbroadcastss  0x2fa9(%rip),%ymm1        # 680c <_sk_callback_avx+0x390>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,70,48,0,0         // vbroadcastss  0x3046(%rip),%ymm3        # 69e8 <_sk_callback_avx+0x394>
+  .byte  196,226,125,24,29,158,47,0,0        // vbroadcastss  0x2f9e(%rip),%ymm3        # 6810 <_sk_callback_avx+0x394>
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  91                                  // pop           %rbx
@@ -18579,9 +18490,9 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  73,137,192                          // mov           %rax,%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  116,5                               // je            39c2 <_sk_gather_i8_avx+0xf>
+  .byte  116,5                               // je            3892 <_sk_gather_i8_avx+0xf>
   .byte  76,137,192                          // mov           %r8,%rax
-  .byte  235,2                               // jmp           39c4 <_sk_gather_i8_avx+0x11>
+  .byte  235,2                               // jmp           3894 <_sk_gather_i8_avx+0x11>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,87                               // push          %r15
   .byte  65,86                               // push          %r14
@@ -18643,10 +18554,10 @@
   .byte  196,163,121,34,4,163,2              // vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   .byte  196,163,121,34,28,19,3              // vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,18,49,0,0             // vmovaps       0x3112(%rip),%ymm10        # 6c00 <_sk_callback_avx+0x5ac>
+  .byte  197,124,40,21,2,49,0,0              // vmovaps       0x3102(%rip),%ymm10        # 6ac0 <_sk_callback_avx+0x644>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,236,46,0,0         // vbroadcastss  0x2eec(%rip),%ymm9        # 69ec <_sk_callback_avx+0x398>
+  .byte  196,98,125,24,13,68,46,0,0          // vbroadcastss  0x2e44(%rip),%ymm9        # 6814 <_sk_callback_avx+0x398>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -18680,38 +18591,38 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,128,0,0,0                    // jne           3bf8 <_sk_load_565_avx+0x8e>
+  .byte  15,133,128,0,0,0                    // jne           3ac8 <_sk_load_565_avx+0x8e>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  197,241,239,201                     // vpxor         %xmm1,%xmm1,%xmm1
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,86,46,0,0          // vbroadcastss  0x2e56(%rip),%ymm0        # 69f0 <_sk_callback_avx+0x39c>
+  .byte  196,226,125,24,5,174,45,0,0         // vbroadcastss  0x2dae(%rip),%ymm0        # 6818 <_sk_callback_avx+0x39c>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,73,46,0,0         // vbroadcastss  0x2e49(%rip),%ymm1        # 69f4 <_sk_callback_avx+0x3a0>
+  .byte  196,226,125,24,13,161,45,0,0        // vbroadcastss  0x2da1(%rip),%ymm1        # 681c <_sk_callback_avx+0x3a0>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,64,46,0,0         // vbroadcastss  0x2e40(%rip),%ymm1        # 69f8 <_sk_callback_avx+0x3a4>
+  .byte  196,226,125,24,13,152,45,0,0        // vbroadcastss  0x2d98(%rip),%ymm1        # 6820 <_sk_callback_avx+0x3a4>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,51,46,0,0         // vbroadcastss  0x2e33(%rip),%ymm3        # 69fc <_sk_callback_avx+0x3a8>
+  .byte  196,226,125,24,29,139,45,0,0        // vbroadcastss  0x2d8b(%rip),%ymm3        # 6824 <_sk_callback_avx+0x3a8>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,42,46,0,0         // vbroadcastss  0x2e2a(%rip),%ymm3        # 6a00 <_sk_callback_avx+0x3ac>
+  .byte  196,226,125,24,29,130,45,0,0        // vbroadcastss  0x2d82(%rip),%ymm3        # 6828 <_sk_callback_avx+0x3ac>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,29,46,0,0         // vbroadcastss  0x2e1d(%rip),%ymm3        # 6a04 <_sk_callback_avx+0x3b0>
+  .byte  196,226,125,24,29,117,45,0,0        // vbroadcastss  0x2d75(%rip),%ymm3        # 682c <_sk_callback_avx+0x3b0>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,18,46,0,0         // vbroadcastss  0x2e12(%rip),%ymm3        # 6a08 <_sk_callback_avx+0x3b4>
+  .byte  196,226,125,24,29,106,45,0,0        // vbroadcastss  0x2d6a(%rip),%ymm3        # 6830 <_sk_callback_avx+0x3b4>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,110,255,255,255              // ja            3b7e <_sk_load_565_avx+0x14>
+  .byte  15,135,110,255,255,255              // ja            3a4e <_sk_load_565_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 3c64 <_sk_load_565_avx+0xfa>
+  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 3b34 <_sk_load_565_avx+0xfa>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18723,7 +18634,7 @@
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,26,255,255,255                  // jmpq          3b7e <_sk_load_565_avx+0x14>
+  .byte  233,26,255,255,255                  // jmpq          3a4e <_sk_load_565_avx+0x14>
   .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -18801,23 +18712,23 @@
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,178,44,0,0         // vbroadcastss  0x2cb2(%rip),%ymm0        # 6a0c <_sk_callback_avx+0x3b8>
+  .byte  196,226,125,24,5,10,44,0,0          // vbroadcastss  0x2c0a(%rip),%ymm0        # 6834 <_sk_callback_avx+0x3b8>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,165,44,0,0        // vbroadcastss  0x2ca5(%rip),%ymm1        # 6a10 <_sk_callback_avx+0x3bc>
+  .byte  196,226,125,24,13,253,43,0,0        // vbroadcastss  0x2bfd(%rip),%ymm1        # 6838 <_sk_callback_avx+0x3bc>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,156,44,0,0        // vbroadcastss  0x2c9c(%rip),%ymm1        # 6a14 <_sk_callback_avx+0x3c0>
+  .byte  196,226,125,24,13,244,43,0,0        // vbroadcastss  0x2bf4(%rip),%ymm1        # 683c <_sk_callback_avx+0x3c0>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,143,44,0,0        // vbroadcastss  0x2c8f(%rip),%ymm3        # 6a18 <_sk_callback_avx+0x3c4>
+  .byte  196,226,125,24,29,231,43,0,0        // vbroadcastss  0x2be7(%rip),%ymm3        # 6840 <_sk_callback_avx+0x3c4>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,134,44,0,0        // vbroadcastss  0x2c86(%rip),%ymm3        # 6a1c <_sk_callback_avx+0x3c8>
+  .byte  196,226,125,24,29,222,43,0,0        // vbroadcastss  0x2bde(%rip),%ymm3        # 6844 <_sk_callback_avx+0x3c8>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,121,44,0,0        // vbroadcastss  0x2c79(%rip),%ymm3        # 6a20 <_sk_callback_avx+0x3cc>
+  .byte  196,226,125,24,29,209,43,0,0        // vbroadcastss  0x2bd1(%rip),%ymm3        # 6848 <_sk_callback_avx+0x3cc>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,110,44,0,0        // vbroadcastss  0x2c6e(%rip),%ymm3        # 6a24 <_sk_callback_avx+0x3d0>
+  .byte  196,226,125,24,29,198,43,0,0        // vbroadcastss  0x2bc6(%rip),%ymm3        # 684c <_sk_callback_avx+0x3d0>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,94                               // pop           %r14
@@ -18831,14 +18742,14 @@
 _sk_store_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,90,44,0,0           // vbroadcastss  0x2c5a(%rip),%ymm8        # 6a28 <_sk_callback_avx+0x3d4>
+  .byte  196,98,125,24,5,178,43,0,0          // vbroadcastss  0x2bb2(%rip),%ymm8        # 6850 <_sk_callback_avx+0x3d4>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,11               // vpslld        $0xb,%xmm9,%xmm10
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
   .byte  196,193,49,114,241,11               // vpslld        $0xb,%xmm9,%xmm9
   .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  .byte  196,98,125,24,21,51,44,0,0          // vbroadcastss  0x2c33(%rip),%ymm10        # 6a2c <_sk_callback_avx+0x3d8>
+  .byte  196,98,125,24,21,139,43,0,0         // vbroadcastss  0x2b8b(%rip),%ymm10        # 6854 <_sk_callback_avx+0x3d8>
   .byte  196,65,116,89,210                   // vmulps        %ymm10,%ymm1,%ymm10
   .byte  196,65,125,91,210                   // vcvtps2dq     %ymm10,%ymm10
   .byte  196,193,33,114,242,5                // vpslld        $0x5,%xmm10,%xmm11
@@ -18852,7 +18763,7 @@
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           3e49 <_sk_store_565_avx+0x89>
+  .byte  117,10                              // jne           3d19 <_sk_store_565_avx+0x89>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18860,9 +18771,9 @@
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            3e45 <_sk_store_565_avx+0x85>
+  .byte  119,236                             // ja            3d15 <_sk_store_565_avx+0x85>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 3ea8 <_sk_store_565_avx+0xe8>
+  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 3d78 <_sk_store_565_avx+0xe8>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18873,7 +18784,7 @@
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           3e45 <_sk_store_565_avx+0x85>
+  .byte  235,159                             // jmp           3d15 <_sk_store_565_avx+0x85>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  245                                 // cmc
   .byte  255                                 // (bad)
@@ -18906,31 +18817,31 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,152,0,0,0                    // jne           3f6a <_sk_load_4444_avx+0xa6>
+  .byte  15,133,152,0,0,0                    // jne           3e3a <_sk_load_4444_avx+0xa6>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  197,241,239,201                     // vpxor         %xmm1,%xmm1,%xmm1
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,60,43,0,0          // vbroadcastss  0x2b3c(%rip),%ymm0        # 6a30 <_sk_callback_avx+0x3dc>
+  .byte  196,226,125,24,5,148,42,0,0         // vbroadcastss  0x2a94(%rip),%ymm0        # 6858 <_sk_callback_avx+0x3dc>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,47,43,0,0         // vbroadcastss  0x2b2f(%rip),%ymm1        # 6a34 <_sk_callback_avx+0x3e0>
+  .byte  196,226,125,24,13,135,42,0,0        // vbroadcastss  0x2a87(%rip),%ymm1        # 685c <_sk_callback_avx+0x3e0>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,38,43,0,0         // vbroadcastss  0x2b26(%rip),%ymm1        # 6a38 <_sk_callback_avx+0x3e4>
+  .byte  196,226,125,24,13,126,42,0,0        // vbroadcastss  0x2a7e(%rip),%ymm1        # 6860 <_sk_callback_avx+0x3e4>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,25,43,0,0         // vbroadcastss  0x2b19(%rip),%ymm2        # 6a3c <_sk_callback_avx+0x3e8>
+  .byte  196,226,125,24,21,113,42,0,0        // vbroadcastss  0x2a71(%rip),%ymm2        # 6864 <_sk_callback_avx+0x3e8>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,16,43,0,0         // vbroadcastss  0x2b10(%rip),%ymm2        # 6a40 <_sk_callback_avx+0x3ec>
+  .byte  196,226,125,24,21,104,42,0,0        // vbroadcastss  0x2a68(%rip),%ymm2        # 6868 <_sk_callback_avx+0x3ec>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,3,43,0,0            // vbroadcastss  0x2b03(%rip),%ymm8        # 6a44 <_sk_callback_avx+0x3f0>
+  .byte  196,98,125,24,5,91,42,0,0           // vbroadcastss  0x2a5b(%rip),%ymm8        # 686c <_sk_callback_avx+0x3f0>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,249,42,0,0          // vbroadcastss  0x2af9(%rip),%ymm8        # 6a48 <_sk_callback_avx+0x3f4>
+  .byte  196,98,125,24,5,81,42,0,0           // vbroadcastss  0x2a51(%rip),%ymm8        # 6870 <_sk_callback_avx+0x3f4>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,235,42,0,0          // vbroadcastss  0x2aeb(%rip),%ymm8        # 6a4c <_sk_callback_avx+0x3f8>
+  .byte  196,98,125,24,5,67,42,0,0           // vbroadcastss  0x2a43(%rip),%ymm8        # 6874 <_sk_callback_avx+0x3f8>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18939,9 +18850,9 @@
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,86,255,255,255               // ja            3ed8 <_sk_load_4444_avx+0x14>
+  .byte  15,135,86,255,255,255               // ja            3da8 <_sk_load_4444_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 3fd8 <_sk_load_4444_avx+0x114>
+  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 3ea8 <_sk_load_4444_avx+0x114>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18953,7 +18864,7 @@
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,2,255,255,255                   // jmpq          3ed8 <_sk_load_4444_avx+0x14>
+  .byte  233,2,255,255,255                   // jmpq          3da8 <_sk_load_4444_avx+0x14>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  242,255                             // repnz         (bad)
   .byte  255                                 // (bad)
@@ -19032,25 +18943,25 @@
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,130,41,0,0         // vbroadcastss  0x2982(%rip),%ymm0        # 6a50 <_sk_callback_avx+0x3fc>
+  .byte  196,226,125,24,5,218,40,0,0         // vbroadcastss  0x28da(%rip),%ymm0        # 6878 <_sk_callback_avx+0x3fc>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,117,41,0,0        // vbroadcastss  0x2975(%rip),%ymm1        # 6a54 <_sk_callback_avx+0x400>
+  .byte  196,226,125,24,13,205,40,0,0        // vbroadcastss  0x28cd(%rip),%ymm1        # 687c <_sk_callback_avx+0x400>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,108,41,0,0        // vbroadcastss  0x296c(%rip),%ymm1        # 6a58 <_sk_callback_avx+0x404>
+  .byte  196,226,125,24,13,196,40,0,0        // vbroadcastss  0x28c4(%rip),%ymm1        # 6880 <_sk_callback_avx+0x404>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,95,41,0,0         // vbroadcastss  0x295f(%rip),%ymm2        # 6a5c <_sk_callback_avx+0x408>
+  .byte  196,226,125,24,21,183,40,0,0        // vbroadcastss  0x28b7(%rip),%ymm2        # 6884 <_sk_callback_avx+0x408>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,86,41,0,0         // vbroadcastss  0x2956(%rip),%ymm2        # 6a60 <_sk_callback_avx+0x40c>
+  .byte  196,226,125,24,21,174,40,0,0        // vbroadcastss  0x28ae(%rip),%ymm2        # 6888 <_sk_callback_avx+0x40c>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,73,41,0,0           // vbroadcastss  0x2949(%rip),%ymm8        # 6a64 <_sk_callback_avx+0x410>
+  .byte  196,98,125,24,5,161,40,0,0          // vbroadcastss  0x28a1(%rip),%ymm8        # 688c <_sk_callback_avx+0x410>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,63,41,0,0           // vbroadcastss  0x293f(%rip),%ymm8        # 6a68 <_sk_callback_avx+0x414>
+  .byte  196,98,125,24,5,151,40,0,0          // vbroadcastss  0x2897(%rip),%ymm8        # 6890 <_sk_callback_avx+0x414>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,49,41,0,0           // vbroadcastss  0x2931(%rip),%ymm8        # 6a6c <_sk_callback_avx+0x418>
+  .byte  196,98,125,24,5,137,40,0,0          // vbroadcastss  0x2889(%rip),%ymm8        # 6894 <_sk_callback_avx+0x418>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -19066,7 +18977,7 @@
 _sk_store_4444_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,22,41,0,0           // vbroadcastss  0x2916(%rip),%ymm8        # 6a70 <_sk_callback_avx+0x41c>
+  .byte  196,98,125,24,5,110,40,0,0          // vbroadcastss  0x286e(%rip),%ymm8        # 6898 <_sk_callback_avx+0x41c>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,12               // vpslld        $0xc,%xmm9,%xmm10
@@ -19093,7 +19004,7 @@
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           41f3 <_sk_store_4444_avx+0xa7>
+  .byte  117,10                              // jne           40c3 <_sk_store_4444_avx+0xa7>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -19101,9 +19012,9 @@
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            41ef <_sk_store_4444_avx+0xa3>
+  .byte  119,236                             // ja            40bf <_sk_store_4444_avx+0xa3>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,66,0,0,0                  // lea           0x42(%rip),%r9        # 4250 <_sk_store_4444_avx+0x104>
+  .byte  76,141,13,66,0,0,0                  // lea           0x42(%rip),%r9        # 4120 <_sk_store_4444_avx+0x104>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -19114,7 +19025,7 @@
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           41ef <_sk_store_4444_avx+0xa3>
+  .byte  235,159                             // jmp           40bf <_sk_store_4444_avx+0xa3>
   .byte  247,255                             // idiv          %edi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -19142,87 +19053,53 @@
 .globl _sk_load_8888_avx
 FUNCTION(_sk_load_8888_avx)
 _sk_load_8888_avx:
+  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,135,0,0,0                    // jne           4301 <_sk_load_8888_avx+0x95>
-  .byte  196,65,124,16,12,186                // vmovups       (%r10,%rdi,4),%ymm9
-  .byte  197,124,40,21,152,41,0,0            // vmovaps       0x2998(%rip),%ymm10        # 6c20 <_sk_callback_avx+0x5cc>
-  .byte  196,193,52,84,194                   // vandps        %ymm10,%ymm9,%ymm0
+  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
+  .byte  76,3,8                              // add           (%rax),%r9
+  .byte  77,133,192                          // test          %r8,%r8
+  .byte  15,133,137,0,0,0                    // jne           41de <_sk_load_8888_avx+0xa2>
+  .byte  196,193,124,16,25                   // vmovups       (%r9),%ymm3
+  .byte  197,124,40,21,126,41,0,0            // vmovaps       0x297e(%rip),%ymm10        # 6ae0 <_sk_callback_avx+0x664>
+  .byte  196,193,100,84,194                  // vandps        %ymm10,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,218,39,0,0          // vbroadcastss  0x27da(%rip),%ymm8        # 6a74 <_sk_callback_avx+0x420>
+  .byte  196,98,125,24,5,40,39,0,0           // vbroadcastss  0x2728(%rip),%ymm8        # 689c <_sk_callback_avx+0x420>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,193,113,114,209,8               // vpsrld        $0x8,%xmm9,%xmm1
-  .byte  196,99,125,25,203,1                 // vextractf128  $0x1,%ymm9,%xmm3
-  .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
+  .byte  197,241,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm1
+  .byte  196,195,125,25,217,1                // vextractf128  $0x1,%ymm3,%xmm9
+  .byte  196,193,105,114,209,8               // vpsrld        $0x8,%xmm9,%xmm2
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  196,193,116,84,202                  // vandps        %ymm10,%ymm1,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,193,33,114,209,16               // vpsrld        $0x10,%xmm9,%xmm11
-  .byte  197,233,114,211,16                  // vpsrld        $0x10,%xmm3,%xmm2
+  .byte  197,161,114,211,16                  // vpsrld        $0x10,%xmm3,%xmm11
+  .byte  196,193,105,114,209,16              // vpsrld        $0x10,%xmm9,%xmm2
   .byte  196,227,37,24,210,1                 // vinsertf128   $0x1,%xmm2,%ymm11,%ymm2
   .byte  196,193,108,84,210                  // vandps        %ymm10,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,193,49,114,209,24               // vpsrld        $0x18,%xmm9,%xmm9
-  .byte  197,225,114,211,24                  // vpsrld        $0x18,%xmm3,%xmm3
-  .byte  196,227,53,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
+  .byte  197,169,114,211,24                  // vpsrld        $0x18,%xmm3,%xmm10
+  .byte  196,193,97,114,209,24               // vpsrld        $0x18,%xmm9,%xmm3
+  .byte  196,227,45,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm10,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
-  .byte  65,137,200                          // mov           %ecx,%r8d
-  .byte  65,128,224,7                        // and           $0x7,%r8b
-  .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
-  .byte  65,254,200                          // dec           %r8b
-  .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,102,255,255,255              // ja            4280 <_sk_load_8888_avx+0x14>
-  .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,139,0,0,0                 // lea           0x8b(%rip),%r9        # 43b0 <_sk_load_8888_avx+0x144>
-  .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
-  .byte  76,1,200                            // add           %r9,%rax
-  .byte  255,224                             // jmpq          *%rax
-  .byte  196,193,121,110,68,186,24           // vmovd         0x18(%r10,%rdi,4),%xmm0
-  .byte  197,249,112,192,68                  // vpshufd       $0x44,%xmm0,%xmm0
-  .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
-  .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
-  .byte  196,99,117,12,200,64                // vblendps      $0x40,%ymm0,%ymm1,%ymm9
-  .byte  196,99,125,25,200,1                 // vextractf128  $0x1,%ymm9,%xmm0
-  .byte  196,195,121,34,68,186,20,1          // vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
-  .byte  196,99,53,24,200,1                  // vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
-  .byte  196,99,125,25,200,1                 // vextractf128  $0x1,%ymm9,%xmm0
-  .byte  196,195,121,34,68,186,16,0          // vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
-  .byte  196,99,53,24,200,1                  // vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
-  .byte  196,195,49,34,68,186,12,3           // vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
-  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  .byte  196,195,49,34,68,186,8,2            // vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
-  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  .byte  196,195,49,34,68,186,4,1            // vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
-  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  .byte  196,195,49,34,4,186,0               // vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
-  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  .byte  233,210,254,255,255                 // jmpq          4280 <_sk_load_8888_avx+0x14>
-  .byte  102,144                             // xchg          %ax,%ax
-  .byte  236                                 // in            (%dx),%al
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  222,255                             // fdivrp        %st,%st(7)
-  .byte  255                                 // (bad)
-  .byte  255,208                             // callq         *%rax
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,194                             // inc           %edx
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,174,255,255,255,154             // ljmp          *-0x65000001(%rsi)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  126,255                             // jle           43c9 <_sk_load_8888_avx+0x15d>
-  .byte  255                                 // (bad)
-  .byte  255                                 // .byte         0xff
+  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
+  .byte  68,41,193                           // sub           %r8d,%ecx
+  .byte  192,225,3                           // shl           $0x3,%cl
+  .byte  72,199,192,255,255,255,255          // mov           $0xffffffffffffffff,%rax
+  .byte  72,211,232                          // shr           %cl,%rax
+  .byte  196,225,249,110,192                 // vmovq         %rax,%xmm0
+  .byte  196,226,121,48,192                  // vpmovzxbw     %xmm0,%xmm0
+  .byte  196,226,121,0,13,58,40,0,0          // vpshufb       0x283a(%rip),%xmm0,%xmm1        # 6a40 <_sk_callback_avx+0x5c4>
+  .byte  196,226,121,33,201                  // vpmovsxbd     %xmm1,%xmm1
+  .byte  196,226,121,0,5,60,40,0,0           // vpshufb       0x283c(%rip),%xmm0,%xmm0        # 6a50 <_sk_callback_avx+0x5d4>
+  .byte  196,226,121,33,192                  // vpmovsxbd     %xmm0,%xmm0
+  .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
+  .byte  196,194,125,44,25                   // vmaskmovps    (%r9),%ymm0,%ymm3
+  .byte  233,49,255,255,255                  // jmpq          415a <_sk_load_8888_avx+0x1e>
 
 HIDDEN _sk_gather_8888_avx
 .globl _sk_gather_8888_avx
@@ -19265,10 +19142,10 @@
   .byte  196,131,121,34,4,152,2              // vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   .byte  196,131,121,34,28,144,3             // vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,194,39,0,0            // vmovaps       0x27c2(%rip),%ymm10        # 6c40 <_sk_callback_avx+0x5ec>
+  .byte  197,124,40,21,37,40,0,0             // vmovaps       0x2825(%rip),%ymm10        # 6b00 <_sk_callback_avx+0x684>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,232,37,0,0         // vbroadcastss  0x25e8(%rip),%ymm9        # 6a78 <_sk_callback_avx+0x424>
+  .byte  196,98,125,24,13,179,37,0,0         // vbroadcastss  0x25b3(%rip),%ymm9        # 68a0 <_sk_callback_avx+0x424>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -19298,9 +19175,11 @@
 .globl _sk_store_8888_avx
 FUNCTION(_sk_store_8888_avx)
 _sk_store_8888_avx:
+  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,118,37,0,0          // vbroadcastss  0x2576(%rip),%ymm8        # 6a7c <_sk_callback_avx+0x428>
+  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
+  .byte  76,3,8                              // add           (%rax),%r9
+  .byte  196,98,125,24,5,54,37,0,0           // vbroadcastss  0x2536(%rip),%ymm8        # 68a4 <_sk_callback_avx+0x428>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,65,116,89,208                   // vmulps        %ymm8,%ymm1,%ymm10
@@ -19324,56 +19203,26 @@
   .byte  196,67,37,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm11,%ymm8
   .byte  196,65,45,86,192                    // vorpd         %ymm8,%ymm10,%ymm8
   .byte  196,65,53,86,192                    // vorpd         %ymm8,%ymm9,%ymm8
-  .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           4594 <_sk_store_8888_avx+0x9c>
-  .byte  196,65,124,17,4,186                 // vmovups       %ymm8,(%r10,%rdi,4)
+  .byte  77,133,192                          // test          %r8,%r8
+  .byte  117,12                              // jne           43fe <_sk_store_8888_avx+0xa9>
+  .byte  196,65,124,17,1                     // vmovups       %ymm8,(%r9)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
-  .byte  65,137,200                          // mov           %ecx,%r8d
-  .byte  65,128,224,7                        // and           $0x7,%r8b
-  .byte  65,254,200                          // dec           %r8b
-  .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            4590 <_sk_store_8888_avx+0x98>
-  .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,85,0,0,0                  // lea           0x55(%rip),%r9        # 4604 <_sk_store_8888_avx+0x10c>
-  .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
-  .byte  76,1,200                            // add           %r9,%rax
-  .byte  255,224                             // jmpq          *%rax
-  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
-  .byte  196,67,121,22,76,186,24,2           // vpextrd       $0x2,%xmm9,0x18(%r10,%rdi,4)
-  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
-  .byte  196,67,121,22,76,186,20,1           // vpextrd       $0x1,%xmm9,0x14(%r10,%rdi,4)
-  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
-  .byte  196,65,122,17,76,186,16             // vmovss        %xmm9,0x10(%r10,%rdi,4)
-  .byte  196,67,121,22,68,186,12,3           // vpextrd       $0x3,%xmm8,0xc(%r10,%rdi,4)
-  .byte  196,67,121,22,68,186,8,2            // vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
-  .byte  196,67,121,22,68,186,4,1            // vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
-  .byte  196,65,121,126,4,186                // vmovd         %xmm8,(%r10,%rdi,4)
-  .byte  235,143                             // jmp           4590 <_sk_store_8888_avx+0x98>
-  .byte  15,31,0                             // nopl          (%rax)
-  .byte  245                                 // cmc
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  237                                 // in            (%dx),%eax
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,229                             // jmpq          *%rbp
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  221,255                             // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,208                             // callq         *%rax
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255,194                             // inc           %edx
-  .byte  255                                 // (bad)
-  .byte  255                                 // (bad)
-  .byte  255                                 // .byte         0xff
-  .byte  180,255                             // mov           $0xff,%ah
-  .byte  255                                 // (bad)
-  .byte  255                                 // .byte         0xff
+  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
+  .byte  68,41,193                           // sub           %r8d,%ecx
+  .byte  192,225,3                           // shl           $0x3,%cl
+  .byte  72,199,192,255,255,255,255          // mov           $0xffffffffffffffff,%rax
+  .byte  72,211,232                          // shr           %cl,%rax
+  .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
+  .byte  196,66,121,48,201                   // vpmovzxbw     %xmm9,%xmm9
+  .byte  196,98,49,0,21,58,38,0,0            // vpshufb       0x263a(%rip),%xmm9,%xmm10        # 6a60 <_sk_callback_avx+0x5e4>
+  .byte  196,66,121,33,210                   // vpmovsxbd     %xmm10,%xmm10
+  .byte  196,98,49,0,13,60,38,0,0            // vpshufb       0x263c(%rip),%xmm9,%xmm9        # 6a70 <_sk_callback_avx+0x5f4>
+  .byte  196,66,121,33,201                   // vpmovsxbd     %xmm9,%xmm9
+  .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
+  .byte  196,66,53,46,1                      // vmaskmovps    %ymm8,%ymm9,(%r9)
+  .byte  235,177                             // jmp           43f7 <_sk_store_8888_avx+0xa2>
 
 HIDDEN _sk_load_f16_avx
 .globl _sk_load_f16_avx
@@ -19387,7 +19236,7 @@
   .byte  197,252,17,116,36,192               // vmovups       %ymm6,-0x40(%rsp)
   .byte  197,252,17,108,36,160               // vmovups       %ymm5,-0x60(%rsp)
   .byte  197,254,127,100,36,128              // vmovdqu       %ymm4,-0x80(%rsp)
-  .byte  15,133,141,2,0,0                    // jne           48d7 <_sk_load_f16_avx+0x2b7>
+  .byte  15,133,141,2,0,0                    // jne           46fd <_sk_load_f16_avx+0x2b7>
   .byte  197,121,16,4,248                    // vmovupd       (%rax,%rdi,8),%xmm8
   .byte  197,249,16,84,248,16                // vmovupd       0x10(%rax,%rdi,8),%xmm2
   .byte  197,249,16,76,248,32                // vmovupd       0x20(%rax,%rdi,8),%xmm1
@@ -19405,13 +19254,13 @@
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  .byte  196,98,125,24,37,221,35,0,0         // vbroadcastss  0x23dd(%rip),%ymm12        # 6a80 <_sk_callback_avx+0x42c>
+  .byte  196,98,125,24,37,223,35,0,0         // vbroadcastss  0x23df(%rip),%ymm12        # 68a8 <_sk_callback_avx+0x42c>
   .byte  196,193,124,84,204                  // vandps        %ymm12,%ymm0,%ymm1
   .byte  197,252,87,193                      // vxorps        %ymm1,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,201,35,0,0         // vbroadcastss  0x23c9(%rip),%xmm11        # 6a84 <_sk_callback_avx+0x430>
+  .byte  196,98,121,24,29,203,35,0,0         // vbroadcastss  0x23cb(%rip),%xmm11        # 68ac <_sk_callback_avx+0x430>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,191,35,0,0         // vbroadcastss  0x23bf(%rip),%xmm13        # 6a88 <_sk_callback_avx+0x434>
+  .byte  196,98,121,24,45,193,35,0,0         // vbroadcastss  0x23c1(%rip),%xmm13        # 68b0 <_sk_callback_avx+0x434>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -19425,7 +19274,7 @@
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,193                      // vorps         %ymm1,%ymm0,%ymm0
   .byte  196,227,125,25,193,1                // vextractf128  $0x1,%ymm0,%xmm1
-  .byte  196,226,121,24,29,117,35,0,0        // vbroadcastss  0x2375(%rip),%xmm3        # 6a8c <_sk_callback_avx+0x438>
+  .byte  196,226,121,24,29,119,35,0,0        // vbroadcastss  0x2377(%rip),%xmm3        # 68b4 <_sk_callback_avx+0x438>
   .byte  197,241,254,203                     // vpaddd        %xmm3,%xmm1,%xmm1
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -19518,29 +19367,29 @@
   .byte  197,123,16,4,248                    // vmovsd        (%rax,%rdi,8),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,79                              // je            4936 <_sk_load_f16_avx+0x316>
+  .byte  116,79                              // je            475c <_sk_load_f16_avx+0x316>
   .byte  197,57,22,68,248,8                  // vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,67                              // jb            4936 <_sk_load_f16_avx+0x316>
+  .byte  114,67                              // jb            475c <_sk_load_f16_avx+0x316>
   .byte  197,251,16,84,248,16                // vmovsd        0x10(%rax,%rdi,8),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,68                              // je            4943 <_sk_load_f16_avx+0x323>
+  .byte  116,68                              // je            4769 <_sk_load_f16_avx+0x323>
   .byte  197,233,22,84,248,24                // vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,56                              // jb            4943 <_sk_load_f16_avx+0x323>
+  .byte  114,56                              // jb            4769 <_sk_load_f16_avx+0x323>
   .byte  197,251,16,76,248,32                // vmovsd        0x20(%rax,%rdi,8),%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,70,253,255,255               // je            4661 <_sk_load_f16_avx+0x41>
+  .byte  15,132,70,253,255,255               // je            4487 <_sk_load_f16_avx+0x41>
   .byte  197,241,22,76,248,40                // vmovhpd       0x28(%rax,%rdi,8),%xmm1,%xmm1
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,54,253,255,255               // jb            4661 <_sk_load_f16_avx+0x41>
+  .byte  15,130,54,253,255,255               // jb            4487 <_sk_load_f16_avx+0x41>
   .byte  197,122,126,76,248,48               // vmovq         0x30(%rax,%rdi,8),%xmm9
-  .byte  233,43,253,255,255                  // jmpq          4661 <_sk_load_f16_avx+0x41>
+  .byte  233,43,253,255,255                  // jmpq          4487 <_sk_load_f16_avx+0x41>
   .byte  197,241,87,201                      // vxorpd        %xmm1,%xmm1,%xmm1
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,30,253,255,255                  // jmpq          4661 <_sk_load_f16_avx+0x41>
+  .byte  233,30,253,255,255                  // jmpq          4487 <_sk_load_f16_avx+0x41>
   .byte  197,241,87,201                      // vxorpd        %xmm1,%xmm1,%xmm1
-  .byte  233,21,253,255,255                  // jmpq          4661 <_sk_load_f16_avx+0x41>
+  .byte  233,21,253,255,255                  // jmpq          4487 <_sk_load_f16_avx+0x41>
 
 HIDDEN _sk_gather_f16_avx
 .globl _sk_gather_f16_avx
@@ -19604,13 +19453,13 @@
   .byte  197,249,105,210                     // vpunpckhwd    %xmm2,%xmm0,%xmm2
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  .byte  196,98,125,24,37,57,32,0,0          // vbroadcastss  0x2039(%rip),%ymm12        # 6a90 <_sk_callback_avx+0x43c>
+  .byte  196,98,125,24,37,59,32,0,0          // vbroadcastss  0x203b(%rip),%ymm12        # 68b8 <_sk_callback_avx+0x43c>
   .byte  196,193,124,84,212                  // vandps        %ymm12,%ymm0,%ymm2
   .byte  197,252,87,194                      // vxorps        %ymm2,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,37,32,0,0          // vbroadcastss  0x2025(%rip),%xmm11        # 6a94 <_sk_callback_avx+0x440>
+  .byte  196,98,121,24,29,39,32,0,0          // vbroadcastss  0x2027(%rip),%xmm11        # 68bc <_sk_callback_avx+0x440>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,27,32,0,0          // vbroadcastss  0x201b(%rip),%xmm13        # 6a98 <_sk_callback_avx+0x444>
+  .byte  196,98,121,24,45,29,32,0,0          // vbroadcastss  0x201d(%rip),%xmm13        # 68c0 <_sk_callback_avx+0x444>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -19624,7 +19473,7 @@
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,194                      // vorps         %ymm2,%ymm0,%ymm0
   .byte  196,227,125,25,194,1                // vextractf128  $0x1,%ymm0,%xmm2
-  .byte  196,226,121,24,29,209,31,0,0        // vbroadcastss  0x1fd1(%rip),%xmm3        # 6a9c <_sk_callback_avx+0x448>
+  .byte  196,226,121,24,29,211,31,0,0        // vbroadcastss  0x1fd3(%rip),%xmm3        # 68c4 <_sk_callback_avx+0x448>
   .byte  197,233,254,211                     // vpaddd        %xmm3,%xmm2,%xmm2
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -19728,12 +19577,12 @@
   .byte  197,252,17,52,36                    // vmovups       %ymm6,(%rsp)
   .byte  197,252,17,108,36,224               // vmovups       %ymm5,-0x20(%rsp)
   .byte  197,252,17,100,36,192               // vmovups       %ymm4,-0x40(%rsp)
-  .byte  196,98,125,24,13,234,29,0,0         // vbroadcastss  0x1dea(%rip),%ymm9        # 6aa0 <_sk_callback_avx+0x44c>
+  .byte  196,98,125,24,13,236,29,0,0         // vbroadcastss  0x1dec(%rip),%ymm9        # 68c8 <_sk_callback_avx+0x44c>
   .byte  196,65,124,84,209                   // vandps        %ymm9,%ymm0,%ymm10
   .byte  197,252,17,68,36,128                // vmovups       %ymm0,-0x80(%rsp)
   .byte  196,65,124,87,218                   // vxorps        %ymm10,%ymm0,%ymm11
   .byte  196,67,125,25,220,1                 // vextractf128  $0x1,%ymm11,%xmm12
-  .byte  196,98,121,24,5,207,29,0,0          // vbroadcastss  0x1dcf(%rip),%xmm8        # 6aa4 <_sk_callback_avx+0x450>
+  .byte  196,98,121,24,5,209,29,0,0          // vbroadcastss  0x1dd1(%rip),%xmm8        # 68cc <_sk_callback_avx+0x450>
   .byte  196,65,57,102,236                   // vpcmpgtd      %xmm12,%xmm8,%xmm13
   .byte  196,65,57,102,243                   // vpcmpgtd      %xmm11,%xmm8,%xmm14
   .byte  196,67,13,24,237,1                  // vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -19743,7 +19592,7 @@
   .byte  196,67,13,24,242,1                  // vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   .byte  196,193,33,114,211,13               // vpsrld        $0xd,%xmm11,%xmm11
   .byte  196,193,25,114,212,13               // vpsrld        $0xd,%xmm12,%xmm12
-  .byte  196,98,125,24,21,150,29,0,0         // vbroadcastss  0x1d96(%rip),%ymm10        # 6aa8 <_sk_callback_avx+0x454>
+  .byte  196,98,125,24,21,152,29,0,0         // vbroadcastss  0x1d98(%rip),%ymm10        # 68d0 <_sk_callback_avx+0x454>
   .byte  196,65,12,86,242                    // vorps         %ymm10,%ymm14,%ymm14
   .byte  196,67,125,25,247,1                 // vextractf128  $0x1,%ymm14,%xmm15
   .byte  196,65,1,254,228                    // vpaddd        %xmm12,%xmm15,%xmm12
@@ -19825,7 +19674,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,66                              // jne           4ef0 <_sk_store_f16_avx+0x25e>
+  .byte  117,66                              // jne           4d16 <_sk_store_f16_avx+0x25e>
   .byte  197,120,17,28,248                   // vmovups       %xmm11,(%rax,%rdi,8)
   .byte  197,120,17,84,248,16                // vmovups       %xmm10,0x10(%rax,%rdi,8)
   .byte  197,120,17,76,248,32                // vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -19841,22 +19690,22 @@
   .byte  255,224                             // jmpq          *%rax
   .byte  197,121,214,28,248                  // vmovq         %xmm11,(%rax,%rdi,8)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,202                             // je            4ec5 <_sk_store_f16_avx+0x233>
+  .byte  116,202                             // je            4ceb <_sk_store_f16_avx+0x233>
   .byte  197,121,23,92,248,8                 // vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,190                             // jb            4ec5 <_sk_store_f16_avx+0x233>
+  .byte  114,190                             // jb            4ceb <_sk_store_f16_avx+0x233>
   .byte  197,121,214,84,248,16               // vmovq         %xmm10,0x10(%rax,%rdi,8)
-  .byte  116,182                             // je            4ec5 <_sk_store_f16_avx+0x233>
+  .byte  116,182                             // je            4ceb <_sk_store_f16_avx+0x233>
   .byte  197,121,23,84,248,24                // vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,170                             // jb            4ec5 <_sk_store_f16_avx+0x233>
+  .byte  114,170                             // jb            4ceb <_sk_store_f16_avx+0x233>
   .byte  197,121,214,76,248,32               // vmovq         %xmm9,0x20(%rax,%rdi,8)
-  .byte  116,162                             // je            4ec5 <_sk_store_f16_avx+0x233>
+  .byte  116,162                             // je            4ceb <_sk_store_f16_avx+0x233>
   .byte  197,121,23,76,248,40                // vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,150                             // jb            4ec5 <_sk_store_f16_avx+0x233>
+  .byte  114,150                             // jb            4ceb <_sk_store_f16_avx+0x233>
   .byte  197,121,214,68,248,48               // vmovq         %xmm8,0x30(%rax,%rdi,8)
-  .byte  235,142                             // jmp           4ec5 <_sk_store_f16_avx+0x233>
+  .byte  235,142                             // jmp           4ceb <_sk_store_f16_avx+0x233>
 
 HIDDEN _sk_load_u16_be_avx
 .globl _sk_load_u16_be_avx
@@ -19866,7 +19715,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,253,0,0,0                    // jne           504a <_sk_load_u16_be_avx+0x113>
+  .byte  15,133,253,0,0,0                    // jne           4e70 <_sk_load_u16_be_avx+0x113>
   .byte  196,65,121,16,4,64                  // vmovupd       (%r8,%rax,2),%xmm8
   .byte  196,193,121,16,84,64,16             // vmovupd       0x10(%r8,%rax,2),%xmm2
   .byte  196,193,121,16,92,64,32             // vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -19888,7 +19737,7 @@
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,238,26,0,0         // vbroadcastss  0x1aee(%rip),%ymm11        # 6aac <_sk_callback_avx+0x458>
+  .byte  196,98,125,24,29,240,26,0,0         // vbroadcastss  0x1af0(%rip),%ymm11        # 68d4 <_sk_callback_avx+0x458>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,177,109,202                     // vpunpckhqdq   %xmm2,%xmm9,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -19922,29 +19771,29 @@
   .byte  196,65,123,16,4,64                  // vmovsd        (%r8,%rax,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            50b0 <_sk_load_u16_be_avx+0x179>
+  .byte  116,85                              // je            4ed6 <_sk_load_u16_be_avx+0x179>
   .byte  196,65,57,22,68,64,8                // vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            50b0 <_sk_load_u16_be_avx+0x179>
+  .byte  114,72                              // jb            4ed6 <_sk_load_u16_be_avx+0x179>
   .byte  196,193,123,16,84,64,16             // vmovsd        0x10(%r8,%rax,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            50bd <_sk_load_u16_be_avx+0x186>
+  .byte  116,72                              // je            4ee3 <_sk_load_u16_be_avx+0x186>
   .byte  196,193,105,22,84,64,24             // vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            50bd <_sk_load_u16_be_avx+0x186>
+  .byte  114,59                              // jb            4ee3 <_sk_load_u16_be_avx+0x186>
   .byte  196,193,123,16,92,64,32             // vmovsd        0x20(%r8,%rax,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,213,254,255,255              // je            4f68 <_sk_load_u16_be_avx+0x31>
+  .byte  15,132,213,254,255,255              // je            4d8e <_sk_load_u16_be_avx+0x31>
   .byte  196,193,97,22,92,64,40              // vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,196,254,255,255              // jb            4f68 <_sk_load_u16_be_avx+0x31>
+  .byte  15,130,196,254,255,255              // jb            4d8e <_sk_load_u16_be_avx+0x31>
   .byte  196,65,122,126,76,64,48             // vmovq         0x30(%r8,%rax,2),%xmm9
-  .byte  233,184,254,255,255                 // jmpq          4f68 <_sk_load_u16_be_avx+0x31>
+  .byte  233,184,254,255,255                 // jmpq          4d8e <_sk_load_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,171,254,255,255                 // jmpq          4f68 <_sk_load_u16_be_avx+0x31>
+  .byte  233,171,254,255,255                 // jmpq          4d8e <_sk_load_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,162,254,255,255                 // jmpq          4f68 <_sk_load_u16_be_avx+0x31>
+  .byte  233,162,254,255,255                 // jmpq          4d8e <_sk_load_u16_be_avx+0x31>
 
 HIDDEN _sk_load_rgb_u16_be_avx
 .globl _sk_load_rgb_u16_be_avx
@@ -19954,7 +19803,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,127                        // lea           (%rdi,%rdi,2),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,243,0,0,0                    // jne           51cb <_sk_load_rgb_u16_be_avx+0x105>
+  .byte  15,133,243,0,0,0                    // jne           4ff1 <_sk_load_rgb_u16_be_avx+0x105>
   .byte  196,193,122,111,4,64                // vmovdqu       (%r8,%rax,2),%xmm0
   .byte  196,193,122,111,84,64,12            // vmovdqu       0xc(%r8,%rax,2),%xmm2
   .byte  196,193,122,111,76,64,24            // vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -19981,7 +19830,7 @@
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,78,25,0,0          // vbroadcastss  0x194e(%rip),%ymm11        # 6ab0 <_sk_callback_avx+0x45c>
+  .byte  196,98,125,24,29,80,25,0,0          // vbroadcastss  0x1950(%rip),%ymm11        # 68d8 <_sk_callback_avx+0x45c>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -20002,41 +19851,41 @@
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,235,24,0,0        // vbroadcastss  0x18eb(%rip),%ymm3        # 6ab4 <_sk_callback_avx+0x460>
+  .byte  196,226,125,24,29,237,24,0,0        // vbroadcastss  0x18ed(%rip),%ymm3        # 68dc <_sk_callback_avx+0x460>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,193,121,110,4,64                // vmovd         (%r8,%rax,2),%xmm0
   .byte  196,193,121,196,68,64,4,2           // vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           51e4 <_sk_load_rgb_u16_be_avx+0x11e>
-  .byte  233,40,255,255,255                  // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,5                               // jne           500a <_sk_load_rgb_u16_be_avx+0x11e>
+  .byte  233,40,255,255,255                  // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,76,64,6             // vmovd         0x6(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,68,64,10,2           // vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            5213 <_sk_load_rgb_u16_be_avx+0x14d>
+  .byte  114,26                              // jb            5039 <_sk_load_rgb_u16_be_avx+0x14d>
   .byte  196,193,121,110,76,64,12            // vmovd         0xc(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,84,64,16,2          // vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           5218 <_sk_load_rgb_u16_be_avx+0x152>
-  .byte  233,249,254,255,255                 // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,244,254,255,255                 // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           503e <_sk_load_rgb_u16_be_avx+0x152>
+  .byte  233,249,254,255,255                 // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,244,254,255,255                 // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,76,64,18            // vmovd         0x12(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,76,64,22,2           // vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            5247 <_sk_load_rgb_u16_be_avx+0x181>
+  .byte  114,26                              // jb            506d <_sk_load_rgb_u16_be_avx+0x181>
   .byte  196,193,121,110,76,64,24            // vmovd         0x18(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,76,64,28,2          // vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           524c <_sk_load_rgb_u16_be_avx+0x186>
-  .byte  233,197,254,255,255                 // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,192,254,255,255                 // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           5072 <_sk_load_rgb_u16_be_avx+0x186>
+  .byte  233,197,254,255,255                 // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,192,254,255,255                 // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,92,64,30            // vmovd         0x1e(%r8,%rax,2),%xmm3
   .byte  196,65,97,196,92,64,34,2            // vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            5275 <_sk_load_rgb_u16_be_avx+0x1af>
+  .byte  114,20                              // jb            509b <_sk_load_rgb_u16_be_avx+0x1af>
   .byte  196,193,121,110,92,64,36            // vmovd         0x24(%r8,%rax,2),%xmm3
   .byte  196,193,97,196,92,64,40,2           // vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  .byte  233,151,254,255,255                 // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,146,254,255,255                 // jmpq          510c <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,151,254,255,255                 // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,146,254,255,255                 // jmpq          4f32 <_sk_load_rgb_u16_be_avx+0x46>
 
 HIDDEN _sk_store_u16_be_avx
 .globl _sk_store_u16_be_avx
@@ -20045,7 +19894,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
-  .byte  196,98,125,24,5,40,24,0,0           // vbroadcastss  0x1828(%rip),%ymm8        # 6ab8 <_sk_callback_avx+0x464>
+  .byte  196,98,125,24,5,42,24,0,0           // vbroadcastss  0x182a(%rip),%ymm8        # 68e0 <_sk_callback_avx+0x464>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,67,125,25,202,1                 // vextractf128  $0x1,%ymm9,%xmm10
@@ -20083,7 +19932,7 @@
   .byte  196,65,17,98,200                    // vpunpckldq    %xmm8,%xmm13,%xmm9
   .byte  196,65,17,106,192                   // vpunpckhdq    %xmm8,%xmm13,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,31                              // jne           5374 <_sk_store_u16_be_avx+0xfa>
+  .byte  117,31                              // jne           519a <_sk_store_u16_be_avx+0xfa>
   .byte  196,65,120,17,28,64                 // vmovups       %xmm11,(%r8,%rax,2)
   .byte  196,65,120,17,84,64,16              // vmovups       %xmm10,0x10(%r8,%rax,2)
   .byte  196,65,120,17,76,64,32              // vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -20092,22 +19941,22 @@
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,214,28,64                // vmovq         %xmm11,(%r8,%rax,2)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,240                             // je            5196 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,92,64,8               // vmovhpd       %xmm11,0x8(%r8,%rax,2)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,227                             // jb            5196 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,84,64,16             // vmovq         %xmm10,0x10(%r8,%rax,2)
-  .byte  116,218                             // je            5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,218                             // je            5196 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,84,64,24              // vmovhpd       %xmm10,0x18(%r8,%rax,2)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,205                             // jb            5196 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,76,64,32             // vmovq         %xmm9,0x20(%r8,%rax,2)
-  .byte  116,196                             // je            5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,196                             // je            5196 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,76,64,40              // vmovhpd       %xmm9,0x28(%r8,%rax,2)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,183                             // jb            5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,183                             // jb            5196 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,68,64,48             // vmovq         %xmm8,0x30(%r8,%rax,2)
-  .byte  235,174                             // jmp           5370 <_sk_store_u16_be_avx+0xf6>
+  .byte  235,174                             // jmp           5196 <_sk_store_u16_be_avx+0xf6>
 
 HIDDEN _sk_load_f32_avx
 .globl _sk_load_f32_avx
@@ -20115,10 +19964,10 @@
 _sk_load_f32_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  119,110                             // ja            5438 <_sk_load_f32_avx+0x76>
+  .byte  119,110                             // ja            525e <_sk_load_f32_avx+0x76>
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,141,21,132,0,0,0                 // lea           0x84(%rip),%r10        # 5460 <_sk_load_f32_avx+0x9e>
+  .byte  76,141,21,134,0,0,0                 // lea           0x86(%rip),%r10        # 5288 <_sk_load_f32_avx+0xa0>
   .byte  73,99,4,138                         // movslq        (%r10,%rcx,4),%rax
   .byte  76,1,208                            // add           %r10,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -20144,19 +19993,19 @@
   .byte  196,193,101,21,216                  // vunpckhpd     %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
-  .byte  133,255                             // test          %edi,%edi
-  .byte  255                                 // (bad)
-  .byte  255,204                             // dec           %esp
+  .byte  102,144                             // xchg          %ax,%ax
+  .byte  131,255,255                         // cmp           $0xffffffff,%edi
+  .byte  255,202                             // dec           %edx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  191,255,255,255,178                 // mov           $0xb2ffffff,%edi
+  .byte  189,255,255,255,176                 // mov           $0xb0ffffff,%ebp
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,165,255,255,255,157             // jmpq          *-0x62000001(%rbp)
+  .byte  255,163,255,255,255,155             // jmpq          *-0x64000001(%rbx)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,149,255,255,255,141             // callq         *-0x72000001(%rbp)
+  .byte  255,147,255,255,255,139             // callq         *-0x74000001(%rbx)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -20177,7 +20026,7 @@
   .byte  196,65,37,20,196                    // vunpcklpd     %ymm12,%ymm11,%ymm8
   .byte  196,65,37,21,220                    // vunpckhpd     %ymm12,%ymm11,%ymm11
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,55                              // jne           54ed <_sk_store_f32_avx+0x6d>
+  .byte  117,55                              // jne           5315 <_sk_store_f32_avx+0x6d>
   .byte  196,67,45,24,225,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   .byte  196,67,61,24,235,1                  // vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   .byte  196,67,45,6,201,49                  // vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -20190,22 +20039,22 @@
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,17,20,128                // vmovupd       %xmm10,(%r8,%rax,4)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            54e9 <_sk_store_f32_avx+0x69>
+  .byte  116,240                             // je            5311 <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,76,128,16             // vmovupd       %xmm9,0x10(%r8,%rax,4)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            54e9 <_sk_store_f32_avx+0x69>
+  .byte  114,227                             // jb            5311 <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,68,128,32             // vmovupd       %xmm8,0x20(%r8,%rax,4)
-  .byte  116,218                             // je            54e9 <_sk_store_f32_avx+0x69>
+  .byte  116,218                             // je            5311 <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,92,128,48             // vmovupd       %xmm11,0x30(%r8,%rax,4)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            54e9 <_sk_store_f32_avx+0x69>
+  .byte  114,205                             // jb            5311 <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,84,128,64,1           // vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  .byte  116,195                             // je            54e9 <_sk_store_f32_avx+0x69>
+  .byte  116,195                             // je            5311 <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,76,128,80,1           // vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,181                             // jb            54e9 <_sk_store_f32_avx+0x69>
+  .byte  114,181                             // jb            5311 <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,68,128,96,1           // vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  .byte  235,171                             // jmp           54e9 <_sk_store_f32_avx+0x69>
+  .byte  235,171                             // jmp           5311 <_sk_store_f32_avx+0x69>
 
 HIDDEN _sk_clamp_x_avx
 .globl _sk_clamp_x_avx
@@ -20309,7 +20158,7 @@
 _sk_clamp_x_1_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  197,188,95,192                      // vmaxps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,90,20,0,0           // vbroadcastss  0x145a(%rip),%ymm8        # 6abc <_sk_callback_avx+0x468>
+  .byte  196,98,125,24,5,90,20,0,0           // vbroadcastss  0x145a(%rip),%ymm8        # 68e4 <_sk_callback_avx+0x468>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -20327,9 +20176,9 @@
 .globl _sk_mirror_x_1_avx
 FUNCTION(_sk_mirror_x_1_avx)
 _sk_mirror_x_1_avx:
-  .byte  196,98,125,24,5,61,20,0,0           // vbroadcastss  0x143d(%rip),%ymm8        # 6ac0 <_sk_callback_avx+0x46c>
+  .byte  196,98,125,24,5,61,20,0,0           // vbroadcastss  0x143d(%rip),%ymm8        # 68e8 <_sk_callback_avx+0x46c>
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,13,51,20,0,0          // vbroadcastss  0x1433(%rip),%ymm9        # 6ac4 <_sk_callback_avx+0x470>
+  .byte  196,98,125,24,13,51,20,0,0          // vbroadcastss  0x1433(%rip),%ymm9        # 68ec <_sk_callback_avx+0x470>
   .byte  196,65,124,89,201                   // vmulps        %ymm9,%ymm0,%ymm9
   .byte  196,67,125,8,201,1                  // vroundps      $0x1,%ymm9,%ymm9
   .byte  196,65,52,88,201                    // vaddps        %ymm9,%ymm9,%ymm9
@@ -20345,12 +20194,12 @@
 .globl _sk_luminance_to_alpha_avx
 FUNCTION(_sk_luminance_to_alpha_avx)
 _sk_luminance_to_alpha_avx:
-  .byte  196,226,125,24,29,3,20,0,0          // vbroadcastss  0x1403(%rip),%ymm3        # 6ac8 <_sk_callback_avx+0x474>
+  .byte  196,226,125,24,29,3,20,0,0          // vbroadcastss  0x1403(%rip),%ymm3        # 68f0 <_sk_callback_avx+0x474>
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,29,250,19,0,0        // vbroadcastss  0x13fa(%rip),%ymm3        # 6acc <_sk_callback_avx+0x478>
+  .byte  196,226,125,24,29,250,19,0,0        // vbroadcastss  0x13fa(%rip),%ymm3        # 68f4 <_sk_callback_avx+0x478>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,237,19,0,0        // vbroadcastss  0x13ed(%rip),%ymm1        # 6ad0 <_sk_callback_avx+0x47c>
+  .byte  196,226,125,24,13,237,19,0,0        // vbroadcastss  0x13ed(%rip),%ymm1        # 68f8 <_sk_callback_avx+0x47c>
   .byte  197,236,89,201                      // vmulps        %ymm1,%ymm2,%ymm1
   .byte  197,252,88,217                      // vaddps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -20569,9 +20418,9 @@
   .byte  72,139,24                           // mov           (%rax),%rbx
   .byte  72,139,104,8                        // mov           0x8(%rax),%rbp
   .byte  72,255,203                          // dec           %rbx
-  .byte  120,7                               // js            5a39 <_sk_evenly_spaced_gradient_avx+0x1f>
+  .byte  120,7                               // js            5861 <_sk_evenly_spaced_gradient_avx+0x1f>
   .byte  196,225,242,42,203                  // vcvtsi2ss     %rbx,%xmm1,%xmm1
-  .byte  235,21                              // jmp           5a4e <_sk_evenly_spaced_gradient_avx+0x34>
+  .byte  235,21                              // jmp           5876 <_sk_evenly_spaced_gradient_avx+0x34>
   .byte  73,137,216                          // mov           %rbx,%r8
   .byte  73,209,232                          // shr           %r8
   .byte  131,227,1                           // and           $0x1,%ebx
@@ -20728,18 +20577,18 @@
 .globl _sk_gauss_a_to_rgba_avx
 FUNCTION(_sk_gauss_a_to_rgba_avx)
 _sk_gauss_a_to_rgba_avx:
-  .byte  196,226,125,24,5,88,13,0,0          // vbroadcastss  0xd58(%rip),%ymm0        # 6ad4 <_sk_callback_avx+0x480>
+  .byte  196,226,125,24,5,88,13,0,0          // vbroadcastss  0xd58(%rip),%ymm0        # 68fc <_sk_callback_avx+0x480>
   .byte  197,228,89,192                      // vmulps        %ymm0,%ymm3,%ymm0
-  .byte  196,226,125,24,13,79,13,0,0         // vbroadcastss  0xd4f(%rip),%ymm1        # 6ad8 <_sk_callback_avx+0x484>
+  .byte  196,226,125,24,13,79,13,0,0         // vbroadcastss  0xd4f(%rip),%ymm1        # 6900 <_sk_callback_avx+0x484>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,13,66,13,0,0         // vbroadcastss  0xd42(%rip),%ymm1        # 6adc <_sk_callback_avx+0x488>
+  .byte  196,226,125,24,13,66,13,0,0         // vbroadcastss  0xd42(%rip),%ymm1        # 6904 <_sk_callback_avx+0x488>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,13,53,13,0,0         // vbroadcastss  0xd35(%rip),%ymm1        # 6ae0 <_sk_callback_avx+0x48c>
+  .byte  196,226,125,24,13,53,13,0,0         // vbroadcastss  0xd35(%rip),%ymm1        # 6908 <_sk_callback_avx+0x48c>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,13,40,13,0,0         // vbroadcastss  0xd28(%rip),%ymm1        # 6ae4 <_sk_callback_avx+0x490>
+  .byte  196,226,125,24,13,40,13,0,0         // vbroadcastss  0xd28(%rip),%ymm1        # 690c <_sk_callback_avx+0x490>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
@@ -20761,12 +20610,12 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,80                              // jb            5e39 <_sk_gradient_avx+0x69>
+  .byte  114,80                              // jb            5c61 <_sk_gradient_avx+0x69>
   .byte  72,139,88,72                        // mov           0x48(%rax),%rbx
   .byte  73,255,200                          // dec           %r8
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
-  .byte  196,98,125,24,21,230,12,0,0         // vbroadcastss  0xce6(%rip),%ymm10        # 6ae8 <_sk_callback_avx+0x494>
+  .byte  196,98,125,24,21,230,12,0,0         // vbroadcastss  0xce6(%rip),%ymm10        # 6910 <_sk_callback_avx+0x494>
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  196,98,125,24,3                     // vbroadcastss  (%rbx),%ymm8
   .byte  197,60,194,192,2                    // vcmpleps      %ymm0,%ymm8,%ymm8
@@ -20778,7 +20627,7 @@
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  73,255,200                          // dec           %r8
-  .byte  117,205                             // jne           5e06 <_sk_gradient_avx+0x36>
+  .byte  117,205                             // jne           5c2e <_sk_gradient_avx+0x36>
   .byte  196,195,249,22,200,1                // vpextrq       $0x1,%xmm1,%r8
   .byte  69,137,193                          // mov           %r8d,%r9d
   .byte  73,193,232,32                       // shr           $0x20,%r8
@@ -20960,27 +20809,27 @@
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,10,9,0,0           // vbroadcastss  0x90a(%rip),%ymm13        # 6aec <_sk_callback_avx+0x498>
+  .byte  196,98,125,24,45,10,9,0,0           // vbroadcastss  0x90a(%rip),%ymm13        # 6914 <_sk_callback_avx+0x498>
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,0,9,0,0            // vbroadcastss  0x900(%rip),%ymm14        # 6af0 <_sk_callback_avx+0x49c>
+  .byte  196,98,125,24,53,0,9,0,0            // vbroadcastss  0x900(%rip),%ymm14        # 6918 <_sk_callback_avx+0x49c>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,241,8,0,0          // vbroadcastss  0x8f1(%rip),%ymm14        # 6af4 <_sk_callback_avx+0x4a0>
+  .byte  196,98,125,24,53,241,8,0,0          // vbroadcastss  0x8f1(%rip),%ymm14        # 691c <_sk_callback_avx+0x4a0>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,229                    // vmulps        %ymm13,%ymm12,%ymm12
-  .byte  196,98,125,24,45,226,8,0,0          // vbroadcastss  0x8e2(%rip),%ymm13        # 6af8 <_sk_callback_avx+0x4a4>
+  .byte  196,98,125,24,45,226,8,0,0          // vbroadcastss  0x8e2(%rip),%ymm13        # 6920 <_sk_callback_avx+0x4a4>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,205,8,0,0          // vbroadcastss  0x8cd(%rip),%ymm10        # 6afc <_sk_callback_avx+0x4a8>
+  .byte  196,98,125,24,21,205,8,0,0          // vbroadcastss  0x8cd(%rip),%ymm10        # 6924 <_sk_callback_avx+0x4a8>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,183,8,0,0          // vbroadcastss  0x8b7(%rip),%ymm10        # 6b00 <_sk_callback_avx+0x4ac>
+  .byte  196,98,125,24,21,183,8,0,0          // vbroadcastss  0x8b7(%rip),%ymm10        # 6928 <_sk_callback_avx+0x4ac>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,161,8,0,0          // vbroadcastss  0x8a1(%rip),%ymm10        # 6b04 <_sk_callback_avx+0x4b0>
+  .byte  196,98,125,24,21,161,8,0,0          // vbroadcastss  0x8a1(%rip),%ymm10        # 692c <_sk_callback_avx+0x4b0>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -21004,7 +20853,7 @@
 FUNCTION(_sk_save_xy_avx)
 _sk_save_xy_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,107,8,0,0           // vbroadcastss  0x86b(%rip),%ymm8        # 6b08 <_sk_callback_avx+0x4b4>
+  .byte  196,98,125,24,5,107,8,0,0           // vbroadcastss  0x86b(%rip),%ymm8        # 6930 <_sk_callback_avx+0x4b4>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -21041,9 +20890,9 @@
 FUNCTION(_sk_bilinear_nx_avx)
 _sk_bilinear_nx_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,247,7,0,0          // vbroadcastss  0x7f7(%rip),%ymm0        # 6b0c <_sk_callback_avx+0x4b8>
+  .byte  196,226,125,24,5,247,7,0,0          // vbroadcastss  0x7f7(%rip),%ymm0        # 6934 <_sk_callback_avx+0x4b8>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,238,7,0,0           // vbroadcastss  0x7ee(%rip),%ymm8        # 6b10 <_sk_callback_avx+0x4bc>
+  .byte  196,98,125,24,5,238,7,0,0           // vbroadcastss  0x7ee(%rip),%ymm8        # 6938 <_sk_callback_avx+0x4bc>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21054,7 +20903,7 @@
 FUNCTION(_sk_bilinear_px_avx)
 _sk_bilinear_px_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,214,7,0,0          // vbroadcastss  0x7d6(%rip),%ymm0        # 6b14 <_sk_callback_avx+0x4c0>
+  .byte  196,226,125,24,5,214,7,0,0          // vbroadcastss  0x7d6(%rip),%ymm0        # 693c <_sk_callback_avx+0x4c0>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -21066,9 +20915,9 @@
 FUNCTION(_sk_bilinear_ny_avx)
 _sk_bilinear_ny_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,186,7,0,0         // vbroadcastss  0x7ba(%rip),%ymm1        # 6b18 <_sk_callback_avx+0x4c4>
+  .byte  196,226,125,24,13,186,7,0,0         // vbroadcastss  0x7ba(%rip),%ymm1        # 6940 <_sk_callback_avx+0x4c4>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,176,7,0,0           // vbroadcastss  0x7b0(%rip),%ymm8        # 6b1c <_sk_callback_avx+0x4c8>
+  .byte  196,98,125,24,5,176,7,0,0           // vbroadcastss  0x7b0(%rip),%ymm8        # 6944 <_sk_callback_avx+0x4c8>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21079,7 +20928,7 @@
 FUNCTION(_sk_bilinear_py_avx)
 _sk_bilinear_py_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,152,7,0,0         // vbroadcastss  0x798(%rip),%ymm1        # 6b20 <_sk_callback_avx+0x4cc>
+  .byte  196,226,125,24,13,152,7,0,0         // vbroadcastss  0x798(%rip),%ymm1        # 6948 <_sk_callback_avx+0x4cc>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -21091,14 +20940,14 @@
 FUNCTION(_sk_bicubic_n3x_avx)
 _sk_bicubic_n3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,123,7,0,0          // vbroadcastss  0x77b(%rip),%ymm0        # 6b24 <_sk_callback_avx+0x4d0>
+  .byte  196,226,125,24,5,123,7,0,0          // vbroadcastss  0x77b(%rip),%ymm0        # 694c <_sk_callback_avx+0x4d0>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,114,7,0,0           // vbroadcastss  0x772(%rip),%ymm8        # 6b28 <_sk_callback_avx+0x4d4>
+  .byte  196,98,125,24,5,114,7,0,0           // vbroadcastss  0x772(%rip),%ymm8        # 6950 <_sk_callback_avx+0x4d4>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,99,7,0,0           // vbroadcastss  0x763(%rip),%ymm10        # 6b2c <_sk_callback_avx+0x4d8>
+  .byte  196,98,125,24,21,99,7,0,0           // vbroadcastss  0x763(%rip),%ymm10        # 6954 <_sk_callback_avx+0x4d8>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,89,7,0,0           // vbroadcastss  0x759(%rip),%ymm10        # 6b30 <_sk_callback_avx+0x4dc>
+  .byte  196,98,125,24,21,89,7,0,0           // vbroadcastss  0x759(%rip),%ymm10        # 6958 <_sk_callback_avx+0x4dc>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -21110,19 +20959,19 @@
 FUNCTION(_sk_bicubic_n1x_avx)
 _sk_bicubic_n1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,60,7,0,0           // vbroadcastss  0x73c(%rip),%ymm0        # 6b34 <_sk_callback_avx+0x4e0>
+  .byte  196,226,125,24,5,60,7,0,0           // vbroadcastss  0x73c(%rip),%ymm0        # 695c <_sk_callback_avx+0x4e0>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,51,7,0,0            // vbroadcastss  0x733(%rip),%ymm8        # 6b38 <_sk_callback_avx+0x4e4>
+  .byte  196,98,125,24,5,51,7,0,0            // vbroadcastss  0x733(%rip),%ymm8        # 6960 <_sk_callback_avx+0x4e4>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,41,7,0,0           // vbroadcastss  0x729(%rip),%ymm9        # 6b3c <_sk_callback_avx+0x4e8>
+  .byte  196,98,125,24,13,41,7,0,0           // vbroadcastss  0x729(%rip),%ymm9        # 6964 <_sk_callback_avx+0x4e8>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,31,7,0,0           // vbroadcastss  0x71f(%rip),%ymm10        # 6b40 <_sk_callback_avx+0x4ec>
+  .byte  196,98,125,24,21,31,7,0,0           // vbroadcastss  0x71f(%rip),%ymm10        # 6968 <_sk_callback_avx+0x4ec>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,16,7,0,0           // vbroadcastss  0x710(%rip),%ymm10        # 6b44 <_sk_callback_avx+0x4f0>
+  .byte  196,98,125,24,21,16,7,0,0           // vbroadcastss  0x710(%rip),%ymm10        # 696c <_sk_callback_avx+0x4f0>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,1,7,0,0            // vbroadcastss  0x701(%rip),%ymm9        # 6b48 <_sk_callback_avx+0x4f4>
+  .byte  196,98,125,24,13,1,7,0,0            // vbroadcastss  0x701(%rip),%ymm9        # 6970 <_sk_callback_avx+0x4f4>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21133,17 +20982,17 @@
 FUNCTION(_sk_bicubic_p1x_avx)
 _sk_bicubic_p1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,233,6,0,0           // vbroadcastss  0x6e9(%rip),%ymm8        # 6b4c <_sk_callback_avx+0x4f8>
+  .byte  196,98,125,24,5,233,6,0,0           // vbroadcastss  0x6e9(%rip),%ymm8        # 6974 <_sk_callback_avx+0x4f8>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,72,64                    // vmovups       0x40(%rax),%ymm9
-  .byte  196,98,125,24,21,219,6,0,0          // vbroadcastss  0x6db(%rip),%ymm10        # 6b50 <_sk_callback_avx+0x4fc>
+  .byte  196,98,125,24,21,219,6,0,0          // vbroadcastss  0x6db(%rip),%ymm10        # 6978 <_sk_callback_avx+0x4fc>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,209,6,0,0          // vbroadcastss  0x6d1(%rip),%ymm11        # 6b54 <_sk_callback_avx+0x500>
+  .byte  196,98,125,24,29,209,6,0,0          // vbroadcastss  0x6d1(%rip),%ymm11        # 697c <_sk_callback_avx+0x500>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,184,6,0,0          // vbroadcastss  0x6b8(%rip),%ymm9        # 6b58 <_sk_callback_avx+0x504>
+  .byte  196,98,125,24,13,184,6,0,0          // vbroadcastss  0x6b8(%rip),%ymm9        # 6980 <_sk_callback_avx+0x504>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21154,13 +21003,13 @@
 FUNCTION(_sk_bicubic_p3x_avx)
 _sk_bicubic_p3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,160,6,0,0          // vbroadcastss  0x6a0(%rip),%ymm0        # 6b5c <_sk_callback_avx+0x508>
+  .byte  196,226,125,24,5,160,6,0,0          // vbroadcastss  0x6a0(%rip),%ymm0        # 6984 <_sk_callback_avx+0x508>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,141,6,0,0          // vbroadcastss  0x68d(%rip),%ymm10        # 6b60 <_sk_callback_avx+0x50c>
+  .byte  196,98,125,24,21,141,6,0,0          // vbroadcastss  0x68d(%rip),%ymm10        # 6988 <_sk_callback_avx+0x50c>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,131,6,0,0          // vbroadcastss  0x683(%rip),%ymm10        # 6b64 <_sk_callback_avx+0x510>
+  .byte  196,98,125,24,21,131,6,0,0          // vbroadcastss  0x683(%rip),%ymm10        # 698c <_sk_callback_avx+0x510>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -21172,14 +21021,14 @@
 FUNCTION(_sk_bicubic_n3y_avx)
 _sk_bicubic_n3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,102,6,0,0         // vbroadcastss  0x666(%rip),%ymm1        # 6b68 <_sk_callback_avx+0x514>
+  .byte  196,226,125,24,13,102,6,0,0         // vbroadcastss  0x666(%rip),%ymm1        # 6990 <_sk_callback_avx+0x514>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,92,6,0,0            // vbroadcastss  0x65c(%rip),%ymm8        # 6b6c <_sk_callback_avx+0x518>
+  .byte  196,98,125,24,5,92,6,0,0            // vbroadcastss  0x65c(%rip),%ymm8        # 6994 <_sk_callback_avx+0x518>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,77,6,0,0           // vbroadcastss  0x64d(%rip),%ymm10        # 6b70 <_sk_callback_avx+0x51c>
+  .byte  196,98,125,24,21,77,6,0,0           // vbroadcastss  0x64d(%rip),%ymm10        # 6998 <_sk_callback_avx+0x51c>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,67,6,0,0           // vbroadcastss  0x643(%rip),%ymm10        # 6b74 <_sk_callback_avx+0x520>
+  .byte  196,98,125,24,21,67,6,0,0           // vbroadcastss  0x643(%rip),%ymm10        # 699c <_sk_callback_avx+0x520>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -21191,19 +21040,19 @@
 FUNCTION(_sk_bicubic_n1y_avx)
 _sk_bicubic_n1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,38,6,0,0          // vbroadcastss  0x626(%rip),%ymm1        # 6b78 <_sk_callback_avx+0x524>
+  .byte  196,226,125,24,13,38,6,0,0          // vbroadcastss  0x626(%rip),%ymm1        # 69a0 <_sk_callback_avx+0x524>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,28,6,0,0            // vbroadcastss  0x61c(%rip),%ymm8        # 6b7c <_sk_callback_avx+0x528>
+  .byte  196,98,125,24,5,28,6,0,0            // vbroadcastss  0x61c(%rip),%ymm8        # 69a4 <_sk_callback_avx+0x528>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,18,6,0,0           // vbroadcastss  0x612(%rip),%ymm9        # 6b80 <_sk_callback_avx+0x52c>
+  .byte  196,98,125,24,13,18,6,0,0           // vbroadcastss  0x612(%rip),%ymm9        # 69a8 <_sk_callback_avx+0x52c>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,8,6,0,0            // vbroadcastss  0x608(%rip),%ymm10        # 6b84 <_sk_callback_avx+0x530>
+  .byte  196,98,125,24,21,8,6,0,0            // vbroadcastss  0x608(%rip),%ymm10        # 69ac <_sk_callback_avx+0x530>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,249,5,0,0          // vbroadcastss  0x5f9(%rip),%ymm10        # 6b88 <_sk_callback_avx+0x534>
+  .byte  196,98,125,24,21,249,5,0,0          // vbroadcastss  0x5f9(%rip),%ymm10        # 69b0 <_sk_callback_avx+0x534>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,234,5,0,0          // vbroadcastss  0x5ea(%rip),%ymm9        # 6b8c <_sk_callback_avx+0x538>
+  .byte  196,98,125,24,13,234,5,0,0          // vbroadcastss  0x5ea(%rip),%ymm9        # 69b4 <_sk_callback_avx+0x538>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21214,17 +21063,17 @@
 FUNCTION(_sk_bicubic_p1y_avx)
 _sk_bicubic_p1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,210,5,0,0           // vbroadcastss  0x5d2(%rip),%ymm8        # 6b90 <_sk_callback_avx+0x53c>
+  .byte  196,98,125,24,5,210,5,0,0           // vbroadcastss  0x5d2(%rip),%ymm8        # 69b8 <_sk_callback_avx+0x53c>
   .byte  197,188,88,72,32                    // vaddps        0x20(%rax),%ymm8,%ymm1
   .byte  197,124,16,72,96                    // vmovups       0x60(%rax),%ymm9
-  .byte  196,98,125,24,21,195,5,0,0          // vbroadcastss  0x5c3(%rip),%ymm10        # 6b94 <_sk_callback_avx+0x540>
+  .byte  196,98,125,24,21,195,5,0,0          // vbroadcastss  0x5c3(%rip),%ymm10        # 69bc <_sk_callback_avx+0x540>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,185,5,0,0          // vbroadcastss  0x5b9(%rip),%ymm11        # 6b98 <_sk_callback_avx+0x544>
+  .byte  196,98,125,24,29,185,5,0,0          // vbroadcastss  0x5b9(%rip),%ymm11        # 69c0 <_sk_callback_avx+0x544>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,160,5,0,0          // vbroadcastss  0x5a0(%rip),%ymm9        # 6b9c <_sk_callback_avx+0x548>
+  .byte  196,98,125,24,13,160,5,0,0          // vbroadcastss  0x5a0(%rip),%ymm9        # 69c4 <_sk_callback_avx+0x548>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21235,13 +21084,13 @@
 FUNCTION(_sk_bicubic_p3y_avx)
 _sk_bicubic_p3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,136,5,0,0         // vbroadcastss  0x588(%rip),%ymm1        # 6ba0 <_sk_callback_avx+0x54c>
+  .byte  196,226,125,24,13,136,5,0,0         // vbroadcastss  0x588(%rip),%ymm1        # 69c8 <_sk_callback_avx+0x54c>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,116,5,0,0          // vbroadcastss  0x574(%rip),%ymm10        # 6ba4 <_sk_callback_avx+0x550>
+  .byte  196,98,125,24,21,116,5,0,0          // vbroadcastss  0x574(%rip),%ymm10        # 69cc <_sk_callback_avx+0x550>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,106,5,0,0          // vbroadcastss  0x56a(%rip),%ymm10        # 6ba8 <_sk_callback_avx+0x554>
+  .byte  196,98,125,24,21,106,5,0,0          // vbroadcastss  0x56a(%rip),%ymm10        # 69d0 <_sk_callback_avx+0x554>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -21365,25 +21214,25 @@
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 682d <.literal4+0xb1>
+  .byte  71,225,61                           // rex.RXB       loope 6655 <.literal4+0xb1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 683d <.literal4+0xc1>
+  .byte  71,225,61                           // rex.RXB       loope 6665 <.literal4+0xc1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 684d <.literal4+0xd1>
+  .byte  71,225,61                           // rex.RXB       loope 6675 <.literal4+0xd1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 685d <.literal4+0xe1>
+  .byte  71,225,61                           // rex.RXB       loope 6685 <.literal4+0xe1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,127                    // add           %al,0x7f00003f(%rax)
@@ -21435,7 +21284,7 @@
   .byte  190,129,128,128,59                  // mov           $0x3b808081,%esi
   .byte  129,128,128,59,0,248,0,0,8,33       // addl          $0x21080000,-0x7ffc480(%rax)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        68b1 <.literal4+0x135>
+  .byte  224,7                               // loopne        66d9 <.literal4+0x135>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -21451,10 +21300,10 @@
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
   .byte  0,52,255                            // add           %dh,(%rdi,%rdi,8)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            68d8 <.literal4+0x15c>
+  .byte  127,0                               // jg            6700 <.literal4+0x15c>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            6951 <.literal4+0x1d5>
+  .byte  119,115                             // ja            6779 <.literal4+0x1d5>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -21468,10 +21317,10 @@
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            690c <.literal4+0x190>
+  .byte  127,0                               // jg            6734 <.literal4+0x190>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            6985 <.literal4+0x209>
+  .byte  119,115                             // ja            67ad <.literal4+0x209>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -21485,10 +21334,10 @@
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6940 <.literal4+0x1c4>
+  .byte  127,0                               // jg            6768 <.literal4+0x1c4>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            69b9 <.literal4+0x23d>
+  .byte  119,115                             // ja            67e1 <.literal4+0x23d>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -21502,10 +21351,10 @@
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6974 <.literal4+0x1f8>
+  .byte  127,0                               // jg            679c <.literal4+0x1f8>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            69ed <.literal4+0x271>
+  .byte  119,115                             // ja            6815 <.literal4+0x271>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -21518,7 +21367,7 @@
   .byte  0,75,0                              // add           %cl,0x0(%rbx)
   .byte  0,128,63,0,0,200                    // add           %al,-0x37ffffc1(%rax)
   .byte  66,0,0                              // rex.X         add %al,(%rax)
-  .byte  127,67                              // jg            69eb <.literal4+0x26f>
+  .byte  127,67                              // jg            6813 <.literal4+0x26f>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -21530,10 +21379,10 @@
   .byte  190,80,128,3,62                     // mov           $0x3e038050,%esi
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           6a0b <.literal4+0x28f>
+  .byte  118,63                              // jbe           6833 <.literal4+0x28f>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            6a1f <.literal4+0x2a3>
+  .byte  127,67                              // jg            6847 <.literal4+0x2a3>
   .byte  129,128,128,59,0,0,128,63,129,128   // addl          $0x80813f80,0x3b80(%rax)
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,128,63,129,128,128                // add           %al,-0x7f7f7ec1(%rax)
@@ -21542,7 +21391,7 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        6a01 <.literal4+0x285>
+  .byte  224,7                               // loopne        6829 <.literal4+0x285>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -21554,7 +21403,7 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        6a1d <.literal4+0x2a1>
+  .byte  224,7                               // loopne        6845 <.literal4+0x2a1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -21565,7 +21414,7 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            6a72 <.literal4+0x2f6>
+  .byte  124,66                              // jl            689a <.literal4+0x2f6>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,55,0,15                 // mov           %ecx,0xf003788(%rax)
@@ -21583,9 +21432,9 @@
   .byte  137,136,136,59,15,0                 // mov           %ecx,0xf3b88(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,61,0,0                  // mov           %ecx,0x3d88(%rax)
-  .byte  112,65                              // jo            6ab5 <.literal4+0x339>
+  .byte  112,65                              // jo            68dd <.literal4+0x339>
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            6ac3 <.literal4+0x347>
+  .byte  127,67                              // jg            68eb <.literal4+0x347>
   .byte  0,128,0,0,0,0                       // add           %al,0x0(%rax)
   .byte  0,128,0,4,0,128                     // add           %al,-0x7ffffc00(%rax)
   .byte  0,0                                 // add           %al,(%rax)
@@ -21601,7 +21450,7 @@
   .byte  0,128,55,0,0,128                    // add           %al,-0x7fffffc9(%rax)
   .byte  63                                  // (bad)
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            6b03 <.literal4+0x387>
+  .byte  127,71                              // jg            692b <.literal4+0x387>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,191,0,0,0                     // add           %al,0xbf(%rax)
@@ -21697,6 +21546,88 @@
   .byte  170                                 // stos          %al,%es:(%rdi)
   .byte  190                                 // .byte         0xbe
 
+BALIGN16
+  .byte  0,2                                 // add           %al,(%rdx)
+  .byte  4,6                                 // add           $0x6,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  8,10                                // or            %cl,(%rdx)
+  .byte  12,14                               // or            $0xe,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,2                                 // add           %al,(%rdx)
+  .byte  4,6                                 // add           $0x6,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  8,10                                // or            %cl,(%rdx)
+  .byte  12,14                               // or            $0xe,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  0,2                                 // add           %al,(%rdx)
+  .byte  4,6                                 // add           $0x6,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  8,10                                // or            %cl,(%rdx)
+  .byte  12,14                               // or            $0xe,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,2                                 // add           %al,(%rdx)
+  .byte  4,6                                 // add           $0x6,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  8,10                                // or            %cl,(%rdx)
+  .byte  12,14                               // or            $0xe,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,0                                 // add           %al,(%rax)
+
 BALIGN32
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
@@ -21778,24 +21709,6 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
-
-BALIGN16
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
 BALIGN32
 
 HIDDEN _sk_start_pipeline_sse41
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 2baf791..0543822 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -1177,8 +1177,8 @@
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
   DB  15,133,180,0,0,0                    ; jne           13ef <_sk_srcover_rgba_8888_hsw+0xcd>
-  DB  196,193,126,111,57                  ; vmovdqu       (%r9),%ymm7
-  DB  197,197,219,37,88,59,0,0            ; vpand         0x3b58(%rip),%ymm7,%ymm4        # 4ea0 <_sk_callback_hsw+0x526>
+  DB  196,193,124,16,57                   ; vmovups       (%r9),%ymm7
+  DB  197,196,84,37,88,59,0,0             ; vandps        0x3b58(%rip),%ymm7,%ymm4        # 4ea0 <_sk_callback_hsw+0x526>
   DB  197,252,91,228                      ; vcvtdq2ps     %ymm4,%ymm4
   DB  196,226,69,0,45,107,59,0,0          ; vpshufb       0x3b6b(%rip),%ymm7,%ymm5        # 4ec0 <_sk_callback_hsw+0x546>
   DB  197,252,91,237                      ; vcvtdq2ps     %ymm5,%ymm5
@@ -1209,7 +1209,7 @@
   DB  196,65,61,235,193                   ; vpor          %ymm9,%ymm8,%ymm8
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,53                              ; jne           1418 <_sk_srcover_rgba_8888_hsw+0xf6>
-  DB  196,65,126,127,1                    ; vmovdqu       %ymm8,(%r9)
+  DB  196,65,124,17,1                     ; vmovups       %ymm8,(%r9)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  255,224                             ; jmpq          *%rax
@@ -1220,7 +1220,7 @@
   DB  72,211,232                          ; shr           %cl,%rax
   DB  196,225,249,110,224                 ; vmovq         %rax,%xmm4
   DB  196,226,125,33,228                  ; vpmovsxbd     %xmm4,%ymm4
-  DB  196,194,93,140,57                   ; vpmaskmovd    (%r9),%ymm4,%ymm7
+  DB  196,194,93,44,57                    ; vmaskmovps    (%r9),%ymm4,%ymm7
   DB  233,40,255,255,255                  ; jmpq          1340 <_sk_srcover_rgba_8888_hsw+0x1e>
   DB  185,8,0,0,0                         ; mov           $0x8,%ecx
   DB  68,41,193                           ; sub           %r8d,%ecx
@@ -1229,7 +1229,7 @@
   DB  72,211,232                          ; shr           %cl,%rax
   DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
   DB  196,66,125,33,201                   ; vpmovsxbd     %xmm9,%ymm9
-  DB  196,66,53,142,1                     ; vpmaskmovd    %ymm8,%ymm9,(%r9)
+  DB  196,66,53,46,1                      ; vmaskmovps    %ymm8,%ymm9,(%r9)
   DB  235,170                             ; jmp           13e8 <_sk_srcover_rgba_8888_hsw+0xc6>
 
 PUBLIC _sk_clamp_0_hsw
@@ -1707,8 +1707,8 @@
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,105                             ; jne           1c26 <_sk_load_tables_hsw+0x7e>
-  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
-  DB  197,229,219,13,54,51,0,0            ; vpand         0x3336(%rip),%ymm3,%ymm1        # 4f00 <_sk_callback_hsw+0x586>
+  DB  196,193,124,16,25                   ; vmovups       (%r9),%ymm3
+  DB  197,228,84,13,54,51,0,0             ; vandps        0x3336(%rip),%ymm3,%ymm1        # 4f00 <_sk_callback_hsw+0x586>
   DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
   DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
   DB  76,139,72,16                        ; mov           0x10(%rax),%r9
@@ -1734,7 +1734,7 @@
   DB  73,211,234                          ; shr           %cl,%r10
   DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
   DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
-  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
+  DB  196,194,125,44,25                   ; vmaskmovps    (%r9),%ymm0,%ymm3
   DB  233,115,255,255,255                 ; jmpq          1bc2 <_sk_load_tables_hsw+0x1a>
 
 PUBLIC _sk_load_tables_u16_be_hsw
@@ -3207,8 +3207,8 @@
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,88                              ; jne           3549 <_sk_load_8888_hsw+0x6d>
-  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
-  DB  197,229,219,5,194,26,0,0            ; vpand         0x1ac2(%rip),%ymm3,%ymm0        # 4fc0 <_sk_callback_hsw+0x646>
+  DB  196,193,124,16,25                   ; vmovups       (%r9),%ymm3
+  DB  197,228,84,5,194,26,0,0             ; vandps        0x1ac2(%rip),%ymm3,%ymm0        # 4fc0 <_sk_callback_hsw+0x646>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
   DB  196,98,125,24,5,133,24,0,0          ; vbroadcastss  0x1885(%rip),%ymm8        # 4d90 <_sk_callback_hsw+0x416>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -3231,7 +3231,7 @@
   DB  72,211,232                          ; shr           %cl,%rax
   DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
   DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
-  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
+  DB  196,194,125,44,25                   ; vmaskmovps    (%r9),%ymm0,%ymm3
   DB  235,135                             ; jmp           34f6 <_sk_load_8888_hsw+0x1a>
 
 PUBLIC _sk_gather_8888_hsw
@@ -3284,7 +3284,7 @@
   DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,12                              ; jne           3658 <_sk_store_8888_hsw+0x73>
-  DB  196,65,126,127,1                    ; vmovdqu       %ymm8,(%r9)
+  DB  196,65,124,17,1                     ; vmovups       %ymm8,(%r9)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  255,224                             ; jmpq          *%rax
@@ -3295,7 +3295,7 @@
   DB  72,211,232                          ; shr           %cl,%rax
   DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
   DB  196,66,125,33,201                   ; vpmovsxbd     %xmm9,%ymm9
-  DB  196,66,53,142,1                     ; vpmaskmovd    %ymm8,%ymm9,(%r9)
+  DB  196,66,53,46,1                      ; vmaskmovps    %ymm8,%ymm9,(%r9)
   DB  235,211                             ; jmp           3651 <_sk_store_8888_hsw+0x6c>
 
 PUBLIC _sk_load_f16_hsw
@@ -5245,14 +5245,14 @@
   DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,231,102,0,0       ; vbroadcastss  0x66e7(%rip),%ymm1        # 6844 <_sk_callback_avx+0x11c>
+  DB  196,226,125,24,13,31,101,0,0        ; vbroadcastss  0x651f(%rip),%ymm1        # 667c <_sk_callback_avx+0x11c>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,21,203,102,0,0       ; vbroadcastss  0x66cb(%rip),%ymm2        # 6848 <_sk_callback_avx+0x120>
+  DB  196,226,125,24,21,3,101,0,0         ; vbroadcastss  0x6503(%rip),%ymm2        # 6680 <_sk_callback_avx+0x120>
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
   DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
@@ -5273,7 +5273,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  196,66,125,24,8                     ; vbroadcastss  (%r8),%ymm9
   DB  196,65,60,87,209                    ; vxorps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,124,102,0,0        ; vbroadcastss  0x667c(%rip),%ymm11        # 684c <_sk_callback_avx+0x124>
+  DB  196,98,125,24,29,180,100,0,0        ; vbroadcastss  0x64b4(%rip),%ymm11        # 6684 <_sk_callback_avx+0x124>
   DB  196,65,44,84,203                    ; vandps        %ymm11,%ymm10,%ymm9
   DB  196,193,25,114,241,5                ; vpslld        $0x5,%xmm9,%xmm12
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
@@ -5284,8 +5284,8 @@
   DB  196,67,125,25,219,1                 ; vextractf128  $0x1,%ymm11,%xmm11
   DB  196,193,33,114,243,4                ; vpslld        $0x4,%xmm11,%xmm11
   DB  196,67,29,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
-  DB  196,98,125,24,37,61,102,0,0         ; vbroadcastss  0x663d(%rip),%ymm12        # 6850 <_sk_callback_avx+0x128>
-  DB  196,98,125,24,45,56,102,0,0         ; vbroadcastss  0x6638(%rip),%ymm13        # 6854 <_sk_callback_avx+0x12c>
+  DB  196,98,125,24,37,117,100,0,0        ; vbroadcastss  0x6475(%rip),%ymm12        # 6688 <_sk_callback_avx+0x128>
+  DB  196,98,125,24,45,112,100,0,0        ; vbroadcastss  0x6470(%rip),%ymm13        # 668c <_sk_callback_avx+0x12c>
   DB  196,65,44,84,245                    ; vandps        %ymm13,%ymm10,%ymm14
   DB  196,193,1,114,246,2                 ; vpslld        $0x2,%xmm14,%xmm15
   DB  196,67,125,25,246,1                 ; vextractf128  $0x1,%ymm14,%xmm14
@@ -5312,9 +5312,9 @@
   DB  196,65,12,86,202                    ; vorps         %ymm10,%ymm14,%ymm9
   DB  196,65,60,86,193                    ; vorps         %ymm9,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,163,101,0,0        ; vbroadcastss  0x65a3(%rip),%ymm9        # 6858 <_sk_callback_avx+0x130>
+  DB  196,98,125,24,13,219,99,0,0         ; vbroadcastss  0x63db(%rip),%ymm9        # 6690 <_sk_callback_avx+0x130>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,153,101,0,0        ; vbroadcastss  0x6599(%rip),%ymm9        # 685c <_sk_callback_avx+0x134>
+  DB  196,98,125,24,13,209,99,0,0         ; vbroadcastss  0x63d1(%rip),%ymm9        # 6694 <_sk_callback_avx+0x134>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
@@ -5373,7 +5373,7 @@
 PUBLIC _sk_srcatop_avx
 _sk_srcatop_avx LABEL PROC
   DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
-  DB  196,98,125,24,5,240,100,0,0         ; vbroadcastss  0x64f0(%rip),%ymm8        # 6860 <_sk_callback_avx+0x138>
+  DB  196,98,125,24,5,40,99,0,0           ; vbroadcastss  0x6328(%rip),%ymm8        # 6698 <_sk_callback_avx+0x138>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5392,7 +5392,7 @@
 PUBLIC _sk_dstatop_avx
 _sk_dstatop_avx LABEL PROC
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
-  DB  196,98,125,24,13,178,100,0,0        ; vbroadcastss  0x64b2(%rip),%ymm9        # 6864 <_sk_callback_avx+0x13c>
+  DB  196,98,125,24,13,234,98,0,0         ; vbroadcastss  0x62ea(%rip),%ymm9        # 669c <_sk_callback_avx+0x13c>
   DB  197,52,92,207                       ; vsubps        %ymm7,%ymm9,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
@@ -5428,7 +5428,7 @@
 
 PUBLIC _sk_srcout_avx
 _sk_srcout_avx LABEL PROC
-  DB  196,98,125,24,5,81,100,0,0          ; vbroadcastss  0x6451(%rip),%ymm8        # 6868 <_sk_callback_avx+0x140>
+  DB  196,98,125,24,5,137,98,0,0          ; vbroadcastss  0x6289(%rip),%ymm8        # 66a0 <_sk_callback_avx+0x140>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -5439,7 +5439,7 @@
 
 PUBLIC _sk_dstout_avx
 _sk_dstout_avx LABEL PROC
-  DB  196,226,125,24,5,52,100,0,0         ; vbroadcastss  0x6434(%rip),%ymm0        # 686c <_sk_callback_avx+0x144>
+  DB  196,226,125,24,5,108,98,0,0         ; vbroadcastss  0x626c(%rip),%ymm0        # 66a4 <_sk_callback_avx+0x144>
   DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
   DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
   DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
@@ -5450,7 +5450,7 @@
 
 PUBLIC _sk_srcover_avx
 _sk_srcover_avx LABEL PROC
-  DB  196,98,125,24,5,23,100,0,0          ; vbroadcastss  0x6417(%rip),%ymm8        # 6870 <_sk_callback_avx+0x148>
+  DB  196,98,125,24,5,79,98,0,0           ; vbroadcastss  0x624f(%rip),%ymm8        # 66a8 <_sk_callback_avx+0x148>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5465,7 +5465,7 @@
 
 PUBLIC _sk_dstover_avx
 _sk_dstover_avx LABEL PROC
-  DB  196,98,125,24,5,234,99,0,0          ; vbroadcastss  0x63ea(%rip),%ymm8        # 6874 <_sk_callback_avx+0x14c>
+  DB  196,98,125,24,5,34,98,0,0           ; vbroadcastss  0x6222(%rip),%ymm8        # 66ac <_sk_callback_avx+0x14c>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
@@ -5489,7 +5489,7 @@
 
 PUBLIC _sk_multiply_avx
 _sk_multiply_avx LABEL PROC
-  DB  196,98,125,24,5,169,99,0,0          ; vbroadcastss  0x63a9(%rip),%ymm8        # 6878 <_sk_callback_avx+0x150>
+  DB  196,98,125,24,5,225,97,0,0          ; vbroadcastss  0x61e1(%rip),%ymm8        # 66b0 <_sk_callback_avx+0x150>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5543,7 +5543,7 @@
 
 PUBLIC _sk_xor__avx
 _sk_xor__avx LABEL PROC
-  DB  196,98,125,24,5,248,98,0,0          ; vbroadcastss  0x62f8(%rip),%ymm8        # 687c <_sk_callback_avx+0x154>
+  DB  196,98,125,24,5,48,97,0,0           ; vbroadcastss  0x6130(%rip),%ymm8        # 66b4 <_sk_callback_avx+0x154>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5578,7 +5578,7 @@
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,120,98,0,0          ; vbroadcastss  0x6278(%rip),%ymm8        # 6880 <_sk_callback_avx+0x158>
+  DB  196,98,125,24,5,176,96,0,0          ; vbroadcastss  0x60b0(%rip),%ymm8        # 66b8 <_sk_callback_avx+0x158>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5602,7 +5602,7 @@
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,36,98,0,0           ; vbroadcastss  0x6224(%rip),%ymm8        # 6884 <_sk_callback_avx+0x15c>
+  DB  196,98,125,24,5,92,96,0,0           ; vbroadcastss  0x605c(%rip),%ymm8        # 66bc <_sk_callback_avx+0x15c>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5629,7 +5629,7 @@
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,196,97,0,0          ; vbroadcastss  0x61c4(%rip),%ymm8        # 6888 <_sk_callback_avx+0x160>
+  DB  196,98,125,24,5,252,95,0,0          ; vbroadcastss  0x5ffc(%rip),%ymm8        # 66c0 <_sk_callback_avx+0x160>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5650,7 +5650,7 @@
   DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,127,97,0,0          ; vbroadcastss  0x617f(%rip),%ymm8        # 688c <_sk_callback_avx+0x164>
+  DB  196,98,125,24,5,183,95,0,0          ; vbroadcastss  0x5fb7(%rip),%ymm8        # 66c4 <_sk_callback_avx+0x164>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5659,7 +5659,7 @@
 
 PUBLIC _sk_colorburn_avx
 _sk_colorburn_avx LABEL PROC
-  DB  196,98,125,24,5,106,97,0,0          ; vbroadcastss  0x616a(%rip),%ymm8        # 6890 <_sk_callback_avx+0x168>
+  DB  196,98,125,24,5,162,95,0,0          ; vbroadcastss  0x5fa2(%rip),%ymm8        # 66c8 <_sk_callback_avx+0x168>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
@@ -5719,7 +5719,7 @@
 PUBLIC _sk_colordodge_avx
 _sk_colordodge_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,13,102,96,0,0         ; vbroadcastss  0x6066(%rip),%ymm9        # 6894 <_sk_callback_avx+0x16c>
+  DB  196,98,125,24,13,158,94,0,0         ; vbroadcastss  0x5e9e(%rip),%ymm9        # 66cc <_sk_callback_avx+0x16c>
   DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
@@ -5774,7 +5774,7 @@
 
 PUBLIC _sk_hardlight_avx
 _sk_hardlight_avx LABEL PROC
-  DB  196,98,125,24,5,120,95,0,0          ; vbroadcastss  0x5f78(%rip),%ymm8        # 6898 <_sk_callback_avx+0x170>
+  DB  196,98,125,24,5,176,93,0,0          ; vbroadcastss  0x5db0(%rip),%ymm8        # 66d0 <_sk_callback_avx+0x170>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5827,7 +5827,7 @@
 
 PUBLIC _sk_overlay_avx
 _sk_overlay_avx LABEL PROC
-  DB  196,98,125,24,5,161,94,0,0          ; vbroadcastss  0x5ea1(%rip),%ymm8        # 689c <_sk_callback_avx+0x174>
+  DB  196,98,125,24,5,217,92,0,0          ; vbroadcastss  0x5cd9(%rip),%ymm8        # 66d4 <_sk_callback_avx+0x174>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5892,10 +5892,10 @@
   DB  196,65,60,88,192                    ; vaddps        %ymm8,%ymm8,%ymm8
   DB  196,65,60,89,216                    ; vmulps        %ymm8,%ymm8,%ymm11
   DB  196,65,60,88,195                    ; vaddps        %ymm11,%ymm8,%ymm8
-  DB  196,98,125,24,29,148,93,0,0         ; vbroadcastss  0x5d94(%rip),%ymm11        # 68a4 <_sk_callback_avx+0x17c>
+  DB  196,98,125,24,29,204,91,0,0         ; vbroadcastss  0x5bcc(%rip),%ymm11        # 66dc <_sk_callback_avx+0x17c>
   DB  196,65,28,88,235                    ; vaddps        %ymm11,%ymm12,%ymm13
   DB  196,65,20,89,192                    ; vmulps        %ymm8,%ymm13,%ymm8
-  DB  196,98,125,24,45,133,93,0,0         ; vbroadcastss  0x5d85(%rip),%ymm13        # 68a8 <_sk_callback_avx+0x180>
+  DB  196,98,125,24,45,189,91,0,0         ; vbroadcastss  0x5bbd(%rip),%ymm13        # 66e0 <_sk_callback_avx+0x180>
   DB  196,65,28,89,245                    ; vmulps        %ymm13,%ymm12,%ymm14
   DB  196,65,12,88,192                    ; vaddps        %ymm8,%ymm14,%ymm8
   DB  196,65,124,82,244                   ; vrsqrtps      %ymm12,%ymm14
@@ -5906,7 +5906,7 @@
   DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
   DB  196,67,13,74,240,240                ; vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   DB  197,116,88,249                      ; vaddps        %ymm1,%ymm1,%ymm15
-  DB  196,98,125,24,5,67,93,0,0           ; vbroadcastss  0x5d43(%rip),%ymm8        # 68a0 <_sk_callback_avx+0x178>
+  DB  196,98,125,24,5,123,91,0,0          ; vbroadcastss  0x5b7b(%rip),%ymm8        # 66d8 <_sk_callback_avx+0x178>
   DB  196,65,60,92,228                    ; vsubps        %ymm12,%ymm8,%ymm12
   DB  197,132,92,195                      ; vsubps        %ymm3,%ymm15,%ymm0
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
@@ -6033,12 +6033,12 @@
   DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
   DB  196,65,36,94,222                    ; vdivps        %ymm14,%ymm11,%ymm11
   DB  196,67,37,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  DB  196,98,125,24,53,13,91,0,0          ; vbroadcastss  0x5b0d(%rip),%ymm14        # 68ac <_sk_callback_avx+0x184>
+  DB  196,98,125,24,53,69,89,0,0          ; vbroadcastss  0x5945(%rip),%ymm14        # 66e4 <_sk_callback_avx+0x184>
   DB  196,65,92,89,222                    ; vmulps        %ymm14,%ymm4,%ymm11
-  DB  196,98,125,24,61,3,91,0,0           ; vbroadcastss  0x5b03(%rip),%ymm15        # 68b0 <_sk_callback_avx+0x188>
+  DB  196,98,125,24,61,59,89,0,0          ; vbroadcastss  0x593b(%rip),%ymm15        # 66e8 <_sk_callback_avx+0x188>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
-  DB  196,226,125,24,5,244,90,0,0         ; vbroadcastss  0x5af4(%rip),%ymm0        # 68b4 <_sk_callback_avx+0x18c>
+  DB  196,226,125,24,5,44,89,0,0          ; vbroadcastss  0x592c(%rip),%ymm0        # 66ec <_sk_callback_avx+0x18c>
   DB  197,76,89,232                       ; vmulps        %ymm0,%ymm6,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
   DB  196,65,52,89,238                    ; vmulps        %ymm14,%ymm9,%ymm13
@@ -6099,7 +6099,7 @@
   DB  196,65,36,95,208                    ; vmaxps        %ymm8,%ymm11,%ymm10
   DB  196,195,109,74,209,240              ; vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,205,89,0,0          ; vbroadcastss  0x59cd(%rip),%ymm8        # 68b8 <_sk_callback_avx+0x190>
+  DB  196,98,125,24,5,5,88,0,0            ; vbroadcastss  0x5805(%rip),%ymm8        # 66f0 <_sk_callback_avx+0x190>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -6156,12 +6156,12 @@
   DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
   DB  196,65,36,94,222                    ; vdivps        %ymm14,%ymm11,%ymm11
   DB  196,67,37,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  DB  196,98,125,24,53,213,88,0,0         ; vbroadcastss  0x58d5(%rip),%ymm14        # 68bc <_sk_callback_avx+0x194>
+  DB  196,98,125,24,53,13,87,0,0          ; vbroadcastss  0x570d(%rip),%ymm14        # 66f4 <_sk_callback_avx+0x194>
   DB  196,65,92,89,222                    ; vmulps        %ymm14,%ymm4,%ymm11
-  DB  196,98,125,24,61,203,88,0,0         ; vbroadcastss  0x58cb(%rip),%ymm15        # 68c0 <_sk_callback_avx+0x198>
+  DB  196,98,125,24,61,3,87,0,0           ; vbroadcastss  0x5703(%rip),%ymm15        # 66f8 <_sk_callback_avx+0x198>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
-  DB  196,226,125,24,5,188,88,0,0         ; vbroadcastss  0x58bc(%rip),%ymm0        # 68c4 <_sk_callback_avx+0x19c>
+  DB  196,226,125,24,5,244,86,0,0         ; vbroadcastss  0x56f4(%rip),%ymm0        # 66fc <_sk_callback_avx+0x19c>
   DB  197,76,89,232                       ; vmulps        %ymm0,%ymm6,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
   DB  196,65,52,89,238                    ; vmulps        %ymm14,%ymm9,%ymm13
@@ -6222,7 +6222,7 @@
   DB  196,65,36,95,208                    ; vmaxps        %ymm8,%ymm11,%ymm10
   DB  196,195,109,74,209,240              ; vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,149,87,0,0          ; vbroadcastss  0x5795(%rip),%ymm8        # 68c8 <_sk_callback_avx+0x1a0>
+  DB  196,98,125,24,5,205,85,0,0          ; vbroadcastss  0x55cd(%rip),%ymm8        # 6700 <_sk_callback_avx+0x1a0>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -6251,12 +6251,12 @@
   DB  197,252,17,68,36,32                 ; vmovups       %ymm0,0x20(%rsp)
   DB  197,124,89,199                      ; vmulps        %ymm7,%ymm0,%ymm8
   DB  197,116,89,207                      ; vmulps        %ymm7,%ymm1,%ymm9
-  DB  196,98,125,24,45,37,87,0,0          ; vbroadcastss  0x5725(%rip),%ymm13        # 68cc <_sk_callback_avx+0x1a4>
+  DB  196,98,125,24,45,93,85,0,0          ; vbroadcastss  0x555d(%rip),%ymm13        # 6704 <_sk_callback_avx+0x1a4>
   DB  196,65,92,89,213                    ; vmulps        %ymm13,%ymm4,%ymm10
-  DB  196,98,125,24,53,27,87,0,0          ; vbroadcastss  0x571b(%rip),%ymm14        # 68d0 <_sk_callback_avx+0x1a8>
+  DB  196,98,125,24,53,83,85,0,0          ; vbroadcastss  0x5553(%rip),%ymm14        # 6708 <_sk_callback_avx+0x1a8>
   DB  196,65,84,89,222                    ; vmulps        %ymm14,%ymm5,%ymm11
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,61,12,87,0,0          ; vbroadcastss  0x570c(%rip),%ymm15        # 68d4 <_sk_callback_avx+0x1ac>
+  DB  196,98,125,24,61,68,85,0,0          ; vbroadcastss  0x5544(%rip),%ymm15        # 670c <_sk_callback_avx+0x1ac>
   DB  196,65,76,89,223                    ; vmulps        %ymm15,%ymm6,%ymm11
   DB  196,193,44,88,195                   ; vaddps        %ymm11,%ymm10,%ymm0
   DB  196,65,60,89,221                    ; vmulps        %ymm13,%ymm8,%ymm11
@@ -6319,7 +6319,7 @@
   DB  196,65,44,95,207                    ; vmaxps        %ymm15,%ymm10,%ymm9
   DB  196,195,37,74,192,0                 ; vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   DB  196,65,124,95,199                   ; vmaxps        %ymm15,%ymm0,%ymm8
-  DB  196,226,125,24,5,211,85,0,0         ; vbroadcastss  0x55d3(%rip),%ymm0        # 68d8 <_sk_callback_avx+0x1b0>
+  DB  196,226,125,24,5,11,84,0,0          ; vbroadcastss  0x540b(%rip),%ymm0        # 6710 <_sk_callback_avx+0x1b0>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,84,36,32                 ; vmulps        0x20(%rsp),%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -6349,12 +6349,12 @@
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
   DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
-  DB  196,98,125,24,45,95,85,0,0          ; vbroadcastss  0x555f(%rip),%ymm13        # 68dc <_sk_callback_avx+0x1b4>
+  DB  196,98,125,24,45,151,83,0,0         ; vbroadcastss  0x5397(%rip),%ymm13        # 6714 <_sk_callback_avx+0x1b4>
   DB  196,65,108,89,213                   ; vmulps        %ymm13,%ymm2,%ymm10
-  DB  196,98,125,24,53,85,85,0,0          ; vbroadcastss  0x5555(%rip),%ymm14        # 68e0 <_sk_callback_avx+0x1b8>
+  DB  196,98,125,24,53,141,83,0,0         ; vbroadcastss  0x538d(%rip),%ymm14        # 6718 <_sk_callback_avx+0x1b8>
   DB  196,65,116,89,222                   ; vmulps        %ymm14,%ymm1,%ymm11
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,61,70,85,0,0          ; vbroadcastss  0x5546(%rip),%ymm15        # 68e4 <_sk_callback_avx+0x1bc>
+  DB  196,98,125,24,61,126,83,0,0         ; vbroadcastss  0x537e(%rip),%ymm15        # 671c <_sk_callback_avx+0x1bc>
   DB  196,65,28,89,223                    ; vmulps        %ymm15,%ymm12,%ymm11
   DB  196,193,44,88,195                   ; vaddps        %ymm11,%ymm10,%ymm0
   DB  196,65,60,89,221                    ; vmulps        %ymm13,%ymm8,%ymm11
@@ -6417,7 +6417,7 @@
   DB  196,65,44,95,207                    ; vmaxps        %ymm15,%ymm10,%ymm9
   DB  196,195,37,74,192,0                 ; vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   DB  196,65,124,95,199                   ; vmaxps        %ymm15,%ymm0,%ymm8
-  DB  196,226,125,24,5,13,84,0,0          ; vbroadcastss  0x540d(%rip),%ymm0        # 68e8 <_sk_callback_avx+0x1c0>
+  DB  196,226,125,24,5,69,82,0,0          ; vbroadcastss  0x5245(%rip),%ymm0        # 6720 <_sk_callback_avx+0x1c0>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -6440,32 +6440,35 @@
 
 PUBLIC _sk_srcover_rgba_8888_avx
 _sk_srcover_rgba_8888_avx LABEL PROC
+  DB  72,131,236,16                       ; sub           $0x10,%rsp
+  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,139,16                           ; mov           (%rax),%r10
-  DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,46,1,0,0                     ; jne           1665 <_sk_srcover_rgba_8888_avx+0x13c>
-  DB  196,65,124,16,4,186                 ; vmovups       (%r10,%rdi,4),%ymm8
-  DB  197,124,40,13,59,87,0,0             ; vmovaps       0x573b(%rip),%ymm9        # 6c80 <_sk_callback_avx+0x558>
-  DB  196,193,60,84,225                   ; vandps        %ymm9,%ymm8,%ymm4
+  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
+  DB  76,3,8                              ; add           (%rax),%r9
+  DB  77,133,192                          ; test          %r8,%r8
+  DB  15,133,47,1,0,0                     ; jne           1675 <_sk_srcover_rgba_8888_avx+0x14c>
+  DB  196,193,124,16,57                   ; vmovups       (%r9),%ymm7
+  DB  197,124,40,13,13,86,0,0             ; vmovaps       0x560d(%rip),%ymm9        # 6b60 <_sk_callback_avx+0x600>
+  DB  196,193,68,84,225                   ; vandps        %ymm9,%ymm7,%ymm4
   DB  197,252,91,228                      ; vcvtdq2ps     %ymm4,%ymm4
-  DB  196,193,81,114,208,8                ; vpsrld        $0x8,%xmm8,%xmm5
-  DB  196,99,125,25,199,1                 ; vextractf128  $0x1,%ymm8,%xmm7
-  DB  197,201,114,215,8                   ; vpsrld        $0x8,%xmm7,%xmm6
+  DB  197,209,114,215,8                   ; vpsrld        $0x8,%xmm7,%xmm5
+  DB  196,195,125,25,248,1                ; vextractf128  $0x1,%ymm7,%xmm8
+  DB  196,193,73,114,208,8                ; vpsrld        $0x8,%xmm8,%xmm6
   DB  196,227,85,24,238,1                 ; vinsertf128   $0x1,%xmm6,%ymm5,%ymm5
   DB  196,193,84,84,233                   ; vandps        %ymm9,%ymm5,%ymm5
   DB  197,252,91,237                      ; vcvtdq2ps     %ymm5,%ymm5
-  DB  196,193,41,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm10
-  DB  197,201,114,215,16                  ; vpsrld        $0x10,%xmm7,%xmm6
+  DB  197,169,114,215,16                  ; vpsrld        $0x10,%xmm7,%xmm10
+  DB  196,193,73,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm6
   DB  196,227,45,24,246,1                 ; vinsertf128   $0x1,%xmm6,%ymm10,%ymm6
   DB  196,193,76,84,241                   ; vandps        %ymm9,%ymm6,%ymm6
   DB  197,252,91,246                      ; vcvtdq2ps     %ymm6,%ymm6
-  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
-  DB  197,193,114,215,24                  ; vpsrld        $0x18,%xmm7,%xmm7
-  DB  196,227,61,24,255,1                 ; vinsertf128   $0x1,%xmm7,%ymm8,%ymm7
+  DB  197,177,114,215,24                  ; vpsrld        $0x18,%xmm7,%xmm9
+  DB  196,193,65,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm7
+  DB  196,227,53,24,255,1                 ; vinsertf128   $0x1,%xmm7,%ymm9,%ymm7
   DB  197,252,91,255                      ; vcvtdq2ps     %ymm7,%ymm7
-  DB  196,98,125,24,5,70,83,0,0           ; vbroadcastss  0x5346(%rip),%ymm8        # 68ec <_sk_callback_avx+0x1c4>
+  DB  196,98,125,24,5,112,81,0,0          ; vbroadcastss  0x5170(%rip),%ymm8        # 6724 <_sk_callback_avx+0x1c4>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
-  DB  196,98,125,24,13,61,83,0,0          ; vbroadcastss  0x533d(%rip),%ymm9        # 68f0 <_sk_callback_avx+0x1c8>
+  DB  196,98,125,24,13,103,81,0,0         ; vbroadcastss  0x5167(%rip),%ymm9        # 6728 <_sk_callback_avx+0x1c8>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,60,89,212                       ; vmulps        %ymm4,%ymm8,%ymm10
   DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
@@ -6497,96 +6500,41 @@
   DB  196,67,37,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm11,%ymm10
   DB  196,65,53,86,202                    ; vorpd         %ymm10,%ymm9,%ymm9
   DB  196,65,61,86,193                    ; vorpd         %ymm9,%ymm8,%ymm8
-  DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,183,0,0,0                    ; jne           1712 <_sk_srcover_rgba_8888_avx+0x1e9>
-  DB  196,65,124,17,4,186                 ; vmovups       %ymm8,(%r10,%rdi,4)
+  DB  77,133,192                          ; test          %r8,%r8
+  DB  117,91                              ; jne           16c0 <_sk_srcover_rgba_8888_avx+0x197>
+  DB  196,65,124,17,1                     ; vmovups       %ymm8,(%r9)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  76,137,193                          ; mov           %r8,%rcx
+  DB  72,131,196,16                       ; add           $0x10,%rsp
   DB  255,224                             ; jmpq          *%rax
-  DB  65,137,200                          ; mov           %ecx,%r8d
-  DB  65,128,224,7                        ; and           $0x7,%r8b
-  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  65,254,200                          ; dec           %r8b
-  DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,191,254,255,255              ; ja            153d <_sk_srcover_rgba_8888_avx+0x14>
-  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,255,0,0,0                 ; lea           0xff(%rip),%r9        # 1788 <_sk_srcover_rgba_8888_avx+0x25f>
-  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
-  DB  76,1,200                            ; add           %r9,%rax
-  DB  255,224                             ; jmpq          *%rax
-  DB  196,193,121,110,100,186,24          ; vmovd         0x18(%r10,%rdi,4),%xmm4
-  DB  197,249,112,228,68                  ; vpshufd       $0x44,%xmm4,%xmm4
-  DB  196,227,125,24,228,1                ; vinsertf128   $0x1,%xmm4,%ymm0,%ymm4
-  DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
-  DB  196,99,85,12,196,64                 ; vblendps      $0x40,%ymm4,%ymm5,%ymm8
-  DB  196,99,125,25,196,1                 ; vextractf128  $0x1,%ymm8,%xmm4
-  DB  196,195,89,34,100,186,20,1          ; vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm4,%xmm4
-  DB  196,99,61,24,196,1                  ; vinsertf128   $0x1,%xmm4,%ymm8,%ymm8
-  DB  196,99,125,25,196,1                 ; vextractf128  $0x1,%ymm8,%xmm4
-  DB  196,195,89,34,100,186,16,0          ; vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm4,%xmm4
-  DB  196,99,61,24,196,1                  ; vinsertf128   $0x1,%xmm4,%ymm8,%ymm8
-  DB  196,195,57,34,100,186,12,3          ; vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm8,%xmm4
-  DB  196,99,61,12,196,15                 ; vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  DB  196,195,57,34,100,186,8,2           ; vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm8,%xmm4
-  DB  196,99,61,12,196,15                 ; vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  DB  196,195,57,34,100,186,4,1           ; vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm8,%xmm4
-  DB  196,99,61,12,196,15                 ; vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  DB  196,195,57,34,36,186,0              ; vpinsrd       $0x0,(%r10,%rdi,4),%xmm8,%xmm4
-  DB  196,99,61,12,196,15                 ; vblendps      $0xf,%ymm4,%ymm8,%ymm8
-  DB  233,43,254,255,255                  ; jmpq          153d <_sk_srcover_rgba_8888_avx+0x14>
-  DB  65,137,200                          ; mov           %ecx,%r8d
-  DB  65,128,224,7                        ; and           $0x7,%r8b
-  DB  65,254,200                          ; dec           %r8b
-  DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,59,255,255,255               ; ja            1661 <_sk_srcover_rgba_8888_avx+0x138>
-  DB  65,15,182,192                       ; movzbl        %r8b,%eax
-  DB  76,141,5,115,0,0,0                  ; lea           0x73(%rip),%r8        # 17a4 <_sk_srcover_rgba_8888_avx+0x27b>
-  DB  73,99,4,128                         ; movslq        (%r8,%rax,4),%rax
-  DB  76,1,192                            ; add           %r8,%rax
-  DB  255,224                             ; jmpq          *%rax
-  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
-  DB  196,67,121,22,76,186,24,2           ; vpextrd       $0x2,%xmm9,0x18(%r10,%rdi,4)
-  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
-  DB  196,67,121,22,76,186,20,1           ; vpextrd       $0x1,%xmm9,0x14(%r10,%rdi,4)
-  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
-  DB  196,65,122,17,76,186,16             ; vmovss        %xmm9,0x10(%r10,%rdi,4)
-  DB  196,67,121,22,68,186,12,3           ; vpextrd       $0x3,%xmm8,0xc(%r10,%rdi,4)
-  DB  196,67,121,22,68,186,8,2            ; vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
-  DB  196,67,121,22,68,186,4,1            ; vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
-  DB  196,65,121,126,4,186                ; vmovd         %xmm8,(%r10,%rdi,4)
-  DB  233,219,254,255,255                 ; jmpq          1661 <_sk_srcover_rgba_8888_avx+0x138>
-  DB  102,144                             ; xchg          %ax,%ax
-  DB  120,255                             ; js            1789 <_sk_srcover_rgba_8888_avx+0x260>
-  DB  255                                 ; (bad)
-  DB  255,106,255                         ; ljmp          *-0x1(%rdx)
-  DB  255                                 ; (bad)
-  DB  255,92,255,255                      ; lcall         *-0x1(%rdi,%rdi,8)
-  DB  255,78,255                          ; decl          -0x1(%rsi)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  58,255                              ; cmp           %bh,%bh
-  DB  255                                 ; (bad)
-  DB  255,38                              ; jmpq          *(%rsi)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,10                              ; decl          (%rdx)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,215                             ; callq         *%rdi
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,207                             ; dec           %edi
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,199                             ; inc           %edi
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  191,255,255,255,178                 ; mov           $0xb2ffffff,%edi
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,164,255,255,255,150,255         ; jmpq          *-0x690001(%rdi,%rdi,8)
-  DB  255                                 ; (bad)
-  DB  255                                 ; .byte         0xff
+  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
+  DB  68,41,193                           ; sub           %r8d,%ecx
+  DB  192,225,3                           ; shl           $0x3,%cl
+  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
+  DB  72,211,232                          ; shr           %cl,%rax
+  DB  196,225,249,110,224                 ; vmovq         %rax,%xmm4
+  DB  196,226,121,48,228                  ; vpmovzxbw     %xmm4,%xmm4
+  DB  196,226,89,0,45,19,84,0,0           ; vpshufb       0x5413(%rip),%xmm4,%xmm5        # 6ab0 <_sk_callback_avx+0x550>
+  DB  196,226,121,33,237                  ; vpmovsxbd     %xmm5,%xmm5
+  DB  196,226,89,0,37,21,84,0,0           ; vpshufb       0x5415(%rip),%xmm4,%xmm4        # 6ac0 <_sk_callback_avx+0x560>
+  DB  196,226,121,33,228                  ; vpmovsxbd     %xmm4,%xmm4
+  DB  196,227,85,24,228,1                 ; vinsertf128   $0x1,%xmm4,%ymm5,%ymm4
+  DB  196,194,93,44,57                    ; vmaskmovps    (%r9),%ymm4,%ymm7
+  DB  233,139,254,255,255                 ; jmpq          154b <_sk_srcover_rgba_8888_avx+0x22>
+  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
+  DB  68,41,193                           ; sub           %r8d,%ecx
+  DB  192,225,3                           ; shl           $0x3,%cl
+  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
+  DB  72,211,232                          ; shr           %cl,%rax
+  DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
+  DB  196,66,121,48,201                   ; vpmovzxbw     %xmm9,%xmm9
+  DB  196,98,49,0,21,200,83,0,0           ; vpshufb       0x53c8(%rip),%xmm9,%xmm10        # 6ab0 <_sk_callback_avx+0x550>
+  DB  196,66,121,33,210                   ; vpmovsxbd     %xmm10,%xmm10
+  DB  196,98,49,0,13,202,83,0,0           ; vpshufb       0x53ca(%rip),%xmm9,%xmm9        # 6ac0 <_sk_callback_avx+0x560>
+  DB  196,66,121,33,201                   ; vpmovsxbd     %xmm9,%xmm9
+  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
+  DB  196,66,53,46,1                      ; vmaskmovps    %ymm8,%ymm9,(%r9)
+  DB  233,95,255,255,255                  ; jmpq          166a <_sk_srcover_rgba_8888_avx+0x141>
 
 PUBLIC _sk_clamp_0_avx
 _sk_clamp_0_avx LABEL PROC
@@ -6600,7 +6548,7 @@
 
 PUBLIC _sk_clamp_1_avx
 _sk_clamp_1_avx LABEL PROC
-  DB  196,98,125,24,5,14,81,0,0           ; vbroadcastss  0x510e(%rip),%ymm8        # 68f4 <_sk_callback_avx+0x1cc>
+  DB  196,98,125,24,5,251,79,0,0          ; vbroadcastss  0x4ffb(%rip),%ymm8        # 672c <_sk_callback_avx+0x1cc>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
@@ -6610,7 +6558,7 @@
 
 PUBLIC _sk_clamp_a_avx
 _sk_clamp_a_avx LABEL PROC
-  DB  196,98,125,24,5,241,80,0,0          ; vbroadcastss  0x50f1(%rip),%ymm8        # 68f8 <_sk_callback_avx+0x1d0>
+  DB  196,98,125,24,5,222,79,0,0          ; vbroadcastss  0x4fde(%rip),%ymm8        # 6730 <_sk_callback_avx+0x1d0>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
   DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
@@ -6682,7 +6630,7 @@
 _sk_unpremul_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
-  DB  196,98,125,24,21,57,80,0,0          ; vbroadcastss  0x5039(%rip),%ymm10        # 68fc <_sk_callback_avx+0x1d4>
+  DB  196,98,125,24,21,38,79,0,0          ; vbroadcastss  0x4f26(%rip),%ymm10        # 6734 <_sk_callback_avx+0x1d4>
   DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
   DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
@@ -6693,17 +6641,17 @@
 
 PUBLIC _sk_from_srgb_avx
 _sk_from_srgb_avx LABEL PROC
-  DB  196,98,125,24,5,26,80,0,0           ; vbroadcastss  0x501a(%rip),%ymm8        # 6900 <_sk_callback_avx+0x1d8>
+  DB  196,98,125,24,5,7,79,0,0            ; vbroadcastss  0x4f07(%rip),%ymm8        # 6738 <_sk_callback_avx+0x1d8>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
-  DB  196,98,125,24,29,12,80,0,0          ; vbroadcastss  0x500c(%rip),%ymm11        # 6904 <_sk_callback_avx+0x1dc>
+  DB  196,98,125,24,29,249,78,0,0         ; vbroadcastss  0x4ef9(%rip),%ymm11        # 673c <_sk_callback_avx+0x1dc>
   DB  196,65,124,89,227                   ; vmulps        %ymm11,%ymm0,%ymm12
-  DB  196,98,125,24,45,2,80,0,0           ; vbroadcastss  0x5002(%rip),%ymm13        # 6908 <_sk_callback_avx+0x1e0>
+  DB  196,98,125,24,45,239,78,0,0         ; vbroadcastss  0x4eef(%rip),%ymm13        # 6740 <_sk_callback_avx+0x1e0>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,44,89,212                    ; vmulps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,243,79,0,0         ; vbroadcastss  0x4ff3(%rip),%ymm12        # 690c <_sk_callback_avx+0x1e4>
+  DB  196,98,125,24,37,224,78,0,0         ; vbroadcastss  0x4ee0(%rip),%ymm12        # 6744 <_sk_callback_avx+0x1e4>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,53,233,79,0,0         ; vbroadcastss  0x4fe9(%rip),%ymm14        # 6910 <_sk_callback_avx+0x1e8>
+  DB  196,98,125,24,53,214,78,0,0         ; vbroadcastss  0x4ed6(%rip),%ymm14        # 6748 <_sk_callback_avx+0x1e8>
   DB  196,193,124,194,198,1               ; vcmpltps      %ymm14,%ymm0,%ymm0
   DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   DB  196,65,116,89,200                   ; vmulps        %ymm8,%ymm1,%ymm9
@@ -6728,20 +6676,20 @@
 PUBLIC _sk_to_srgb_avx
 _sk_to_srgb_avx LABEL PROC
   DB  197,124,82,200                      ; vrsqrtps      %ymm0,%ymm9
-  DB  196,98,125,24,5,126,79,0,0          ; vbroadcastss  0x4f7e(%rip),%ymm8        # 6914 <_sk_callback_avx+0x1ec>
+  DB  196,98,125,24,5,107,78,0,0          ; vbroadcastss  0x4e6b(%rip),%ymm8        # 674c <_sk_callback_avx+0x1ec>
   DB  196,65,124,89,208                   ; vmulps        %ymm8,%ymm0,%ymm10
-  DB  196,98,125,24,29,116,79,0,0         ; vbroadcastss  0x4f74(%rip),%ymm11        # 6918 <_sk_callback_avx+0x1f0>
+  DB  196,98,125,24,29,97,78,0,0          ; vbroadcastss  0x4e61(%rip),%ymm11        # 6750 <_sk_callback_avx+0x1f0>
   DB  196,65,52,89,227                    ; vmulps        %ymm11,%ymm9,%ymm12
-  DB  196,98,125,24,45,106,79,0,0         ; vbroadcastss  0x4f6a(%rip),%ymm13        # 691c <_sk_callback_avx+0x1f4>
+  DB  196,98,125,24,45,87,78,0,0          ; vbroadcastss  0x4e57(%rip),%ymm13        # 6754 <_sk_callback_avx+0x1f4>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,52,89,228                    ; vmulps        %ymm12,%ymm9,%ymm12
-  DB  196,98,125,24,53,91,79,0,0          ; vbroadcastss  0x4f5b(%rip),%ymm14        # 6920 <_sk_callback_avx+0x1f8>
+  DB  196,98,125,24,53,72,78,0,0          ; vbroadcastss  0x4e48(%rip),%ymm14        # 6758 <_sk_callback_avx+0x1f8>
   DB  196,65,28,88,230                    ; vaddps        %ymm14,%ymm12,%ymm12
-  DB  196,98,125,24,61,81,79,0,0          ; vbroadcastss  0x4f51(%rip),%ymm15        # 6924 <_sk_callback_avx+0x1fc>
+  DB  196,98,125,24,61,62,78,0,0          ; vbroadcastss  0x4e3e(%rip),%ymm15        # 675c <_sk_callback_avx+0x1fc>
   DB  196,65,52,88,207                    ; vaddps        %ymm15,%ymm9,%ymm9
   DB  196,65,124,83,201                   ; vrcpps        %ymm9,%ymm9
   DB  196,65,52,89,204                    ; vmulps        %ymm12,%ymm9,%ymm9
-  DB  196,98,125,24,37,61,79,0,0          ; vbroadcastss  0x4f3d(%rip),%ymm12        # 6928 <_sk_callback_avx+0x200>
+  DB  196,98,125,24,37,42,78,0,0          ; vbroadcastss  0x4e2a(%rip),%ymm12        # 6760 <_sk_callback_avx+0x200>
   DB  196,193,124,194,196,1               ; vcmpltps      %ymm12,%ymm0,%ymm0
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
@@ -6776,7 +6724,7 @@
   DB  197,124,93,201                      ; vminps        %ymm1,%ymm0,%ymm9
   DB  197,52,93,202                       ; vminps        %ymm2,%ymm9,%ymm9
   DB  196,65,60,92,209                    ; vsubps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,163,78,0,0         ; vbroadcastss  0x4ea3(%rip),%ymm11        # 692c <_sk_callback_avx+0x204>
+  DB  196,98,125,24,29,144,77,0,0         ; vbroadcastss  0x4d90(%rip),%ymm11        # 6764 <_sk_callback_avx+0x204>
   DB  196,65,36,94,218                    ; vdivps        %ymm10,%ymm11,%ymm11
   DB  197,116,92,226                      ; vsubps        %ymm2,%ymm1,%ymm12
   DB  196,65,28,89,227                    ; vmulps        %ymm11,%ymm12,%ymm12
@@ -6786,19 +6734,19 @@
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  197,252,92,201                      ; vsubps        %ymm1,%ymm0,%ymm1
   DB  196,193,116,89,203                  ; vmulps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,124,78,0,0         ; vbroadcastss  0x4e7c(%rip),%ymm11        # 6938 <_sk_callback_avx+0x210>
+  DB  196,98,125,24,29,105,77,0,0         ; vbroadcastss  0x4d69(%rip),%ymm11        # 6770 <_sk_callback_avx+0x210>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,106,78,0,0         ; vbroadcastss  0x4e6a(%rip),%ymm11        # 6934 <_sk_callback_avx+0x20c>
+  DB  196,98,125,24,29,87,77,0,0          ; vbroadcastss  0x4d57(%rip),%ymm11        # 676c <_sk_callback_avx+0x20c>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,227,117,74,202,224              ; vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,82,78,0,0         ; vbroadcastss  0x4e52(%rip),%ymm2        # 6930 <_sk_callback_avx+0x208>
+  DB  196,226,125,24,21,63,77,0,0         ; vbroadcastss  0x4d3f(%rip),%ymm2        # 6768 <_sk_callback_avx+0x208>
   DB  196,65,12,87,246                    ; vxorps        %ymm14,%ymm14,%ymm14
   DB  196,227,13,74,210,208               ; vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   DB  197,188,194,192,0                   ; vcmpeqps      %ymm0,%ymm8,%ymm0
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
   DB  196,227,117,74,194,0                ; vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   DB  196,193,60,88,201                   ; vaddps        %ymm9,%ymm8,%ymm1
-  DB  196,98,125,24,37,57,78,0,0          ; vbroadcastss  0x4e39(%rip),%ymm12        # 6940 <_sk_callback_avx+0x218>
+  DB  196,98,125,24,37,38,77,0,0          ; vbroadcastss  0x4d26(%rip),%ymm12        # 6778 <_sk_callback_avx+0x218>
   DB  196,193,116,89,212                  ; vmulps        %ymm12,%ymm1,%ymm2
   DB  197,28,194,226,1                    ; vcmpltps      %ymm2,%ymm12,%ymm12
   DB  196,65,36,92,216                    ; vsubps        %ymm8,%ymm11,%ymm11
@@ -6808,7 +6756,7 @@
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  196,195,125,74,198,128              ; vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   DB  196,195,117,74,206,128              ; vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  DB  196,98,125,24,5,252,77,0,0          ; vbroadcastss  0x4dfc(%rip),%ymm8        # 693c <_sk_callback_avx+0x214>
+  DB  196,98,125,24,5,233,76,0,0          ; vbroadcastss  0x4ce9(%rip),%ymm8        # 6774 <_sk_callback_avx+0x214>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6823,7 +6771,7 @@
   DB  197,252,17,28,36                    ; vmovups       %ymm3,(%rsp)
   DB  197,252,40,225                      ; vmovaps       %ymm1,%ymm4
   DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
-  DB  196,98,125,24,5,195,77,0,0          ; vbroadcastss  0x4dc3(%rip),%ymm8        # 6944 <_sk_callback_avx+0x21c>
+  DB  196,98,125,24,5,176,76,0,0          ; vbroadcastss  0x4cb0(%rip),%ymm8        # 677c <_sk_callback_avx+0x21c>
   DB  197,60,194,202,2                    ; vcmpleps      %ymm2,%ymm8,%ymm9
   DB  197,92,89,210                       ; vmulps        %ymm2,%ymm4,%ymm10
   DB  196,65,92,92,218                    ; vsubps        %ymm10,%ymm4,%ymm11
@@ -6831,23 +6779,23 @@
   DB  197,52,88,210                       ; vaddps        %ymm2,%ymm9,%ymm10
   DB  197,108,88,202                      ; vaddps        %ymm2,%ymm2,%ymm9
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,29,157,77,0,0         ; vbroadcastss  0x4d9d(%rip),%ymm11        # 6948 <_sk_callback_avx+0x220>
+  DB  196,98,125,24,29,138,76,0,0         ; vbroadcastss  0x4c8a(%rip),%ymm11        # 6780 <_sk_callback_avx+0x220>
   DB  196,65,100,88,219                   ; vaddps        %ymm11,%ymm3,%ymm11
   DB  196,67,125,8,227,1                  ; vroundps      $0x1,%ymm11,%ymm12
   DB  196,65,36,92,252                    ; vsubps        %ymm12,%ymm11,%ymm15
   DB  196,65,44,92,217                    ; vsubps        %ymm9,%ymm10,%ymm11
-  DB  196,98,125,24,37,135,77,0,0         ; vbroadcastss  0x4d87(%rip),%ymm12        # 6950 <_sk_callback_avx+0x228>
+  DB  196,98,125,24,37,116,76,0,0         ; vbroadcastss  0x4c74(%rip),%ymm12        # 6788 <_sk_callback_avx+0x228>
   DB  196,193,4,89,196                    ; vmulps        %ymm12,%ymm15,%ymm0
-  DB  196,98,125,24,45,125,77,0,0         ; vbroadcastss  0x4d7d(%rip),%ymm13        # 6954 <_sk_callback_avx+0x22c>
+  DB  196,98,125,24,45,106,76,0,0         ; vbroadcastss  0x4c6a(%rip),%ymm13        # 678c <_sk_callback_avx+0x22c>
   DB  197,20,92,240                       ; vsubps        %ymm0,%ymm13,%ymm14
   DB  196,65,36,89,246                    ; vmulps        %ymm14,%ymm11,%ymm14
   DB  196,65,52,88,246                    ; vaddps        %ymm14,%ymm9,%ymm14
-  DB  196,226,125,24,13,94,77,0,0         ; vbroadcastss  0x4d5e(%rip),%ymm1        # 694c <_sk_callback_avx+0x224>
+  DB  196,226,125,24,13,75,76,0,0         ; vbroadcastss  0x4c4b(%rip),%ymm1        # 6784 <_sk_callback_avx+0x224>
   DB  196,193,116,194,255,2               ; vcmpleps      %ymm15,%ymm1,%ymm7
   DB  196,195,13,74,249,112               ; vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   DB  196,65,60,194,247,2                 ; vcmpleps      %ymm15,%ymm8,%ymm14
   DB  196,227,45,74,255,224               ; vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  DB  196,98,125,24,53,73,77,0,0          ; vbroadcastss  0x4d49(%rip),%ymm14        # 6958 <_sk_callback_avx+0x230>
+  DB  196,98,125,24,53,54,76,0,0          ; vbroadcastss  0x4c36(%rip),%ymm14        # 6790 <_sk_callback_avx+0x230>
   DB  196,65,12,194,255,2                 ; vcmpleps      %ymm15,%ymm14,%ymm15
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -6866,7 +6814,7 @@
   DB  197,164,89,247                      ; vmulps        %ymm7,%ymm11,%ymm6
   DB  197,180,88,246                      ; vaddps        %ymm6,%ymm9,%ymm6
   DB  196,227,77,74,237,0                 ; vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  DB  196,226,125,24,5,235,76,0,0         ; vbroadcastss  0x4ceb(%rip),%ymm0        # 695c <_sk_callback_avx+0x234>
+  DB  196,226,125,24,5,216,75,0,0         ; vbroadcastss  0x4bd8(%rip),%ymm0        # 6794 <_sk_callback_avx+0x234>
   DB  197,228,88,192                      ; vaddps        %ymm0,%ymm3,%ymm0
   DB  196,227,125,8,216,1                 ; vroundps      $0x1,%ymm0,%ymm3
   DB  197,252,92,195                      ; vsubps        %ymm3,%ymm0,%ymm0
@@ -6914,14 +6862,14 @@
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,68                              ; jne           1d6e <_sk_scale_u8_avx+0x54>
+  DB  117,68                              ; jne           1cb9 <_sk_scale_u8_avx+0x54>
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
   DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,14,76,0,0          ; vbroadcastss  0x4c0e(%rip),%ymm9        # 6960 <_sk_callback_avx+0x238>
+  DB  196,98,125,24,13,251,74,0,0         ; vbroadcastss  0x4afb(%rip),%ymm9        # 6798 <_sk_callback_avx+0x238>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -6939,9 +6887,9 @@
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           1d76 <_sk_scale_u8_avx+0x5c>
+  DB  117,234                             ; jne           1cc1 <_sk_scale_u8_avx+0x5c>
   DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
-  DB  235,155                             ; jmp           1d2e <_sk_scale_u8_avx+0x14>
+  DB  235,155                             ; jmp           1c79 <_sk_scale_u8_avx+0x14>
 
 PUBLIC _sk_lerp_1_float_avx
 _sk_lerp_1_float_avx LABEL PROC
@@ -6969,14 +6917,14 @@
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,104                             ; jne           1e4a <_sk_lerp_u8_avx+0x78>
+  DB  117,104                             ; jne           1d95 <_sk_lerp_u8_avx+0x78>
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
   DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,90,75,0,0          ; vbroadcastss  0x4b5a(%rip),%ymm9        # 6964 <_sk_callback_avx+0x23c>
+  DB  196,98,125,24,13,71,74,0,0          ; vbroadcastss  0x4a47(%rip),%ymm9        # 679c <_sk_callback_avx+0x23c>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -7002,35 +6950,35 @@
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           1e52 <_sk_lerp_u8_avx+0x80>
+  DB  117,234                             ; jne           1d9d <_sk_lerp_u8_avx+0x80>
   DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
-  DB  233,116,255,255,255                 ; jmpq          1de6 <_sk_lerp_u8_avx+0x14>
+  DB  233,116,255,255,255                 ; jmpq          1d31 <_sk_lerp_u8_avx+0x14>
 
 PUBLIC _sk_lerp_565_avx
 _sk_lerp_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,208,0,0,0                    ; jne           1f50 <_sk_lerp_565_avx+0xde>
+  DB  15,133,208,0,0,0                    ; jne           1e9b <_sk_lerp_565_avx+0xde>
   DB  196,65,122,111,4,122                ; vmovdqu       (%r10,%rdi,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  196,65,57,105,201                   ; vpunpckhwd    %xmm9,%xmm8,%xmm9
   DB  196,66,121,51,192                   ; vpmovzxwd     %xmm8,%xmm8
   DB  196,67,61,24,193,1                  ; vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,196,74,0,0         ; vbroadcastss  0x4ac4(%rip),%ymm9        # 6968 <_sk_callback_avx+0x240>
+  DB  196,98,125,24,13,177,73,0,0         ; vbroadcastss  0x49b1(%rip),%ymm9        # 67a0 <_sk_callback_avx+0x240>
   DB  196,65,60,84,201                    ; vandps        %ymm9,%ymm8,%ymm9
   DB  196,65,124,91,201                   ; vcvtdq2ps     %ymm9,%ymm9
-  DB  196,98,125,24,21,181,74,0,0         ; vbroadcastss  0x4ab5(%rip),%ymm10        # 696c <_sk_callback_avx+0x244>
+  DB  196,98,125,24,21,162,73,0,0         ; vbroadcastss  0x49a2(%rip),%ymm10        # 67a4 <_sk_callback_avx+0x244>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,171,74,0,0         ; vbroadcastss  0x4aab(%rip),%ymm10        # 6970 <_sk_callback_avx+0x248>
+  DB  196,98,125,24,21,152,73,0,0         ; vbroadcastss  0x4998(%rip),%ymm10        # 67a8 <_sk_callback_avx+0x248>
   DB  196,65,60,84,210                    ; vandps        %ymm10,%ymm8,%ymm10
   DB  196,65,124,91,210                   ; vcvtdq2ps     %ymm10,%ymm10
-  DB  196,98,125,24,29,156,74,0,0         ; vbroadcastss  0x4a9c(%rip),%ymm11        # 6974 <_sk_callback_avx+0x24c>
+  DB  196,98,125,24,29,137,73,0,0         ; vbroadcastss  0x4989(%rip),%ymm11        # 67ac <_sk_callback_avx+0x24c>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,29,146,74,0,0         ; vbroadcastss  0x4a92(%rip),%ymm11        # 6978 <_sk_callback_avx+0x250>
+  DB  196,98,125,24,29,127,73,0,0         ; vbroadcastss  0x497f(%rip),%ymm11        # 67b0 <_sk_callback_avx+0x250>
   DB  196,65,60,84,195                    ; vandps        %ymm11,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,29,131,74,0,0         ; vbroadcastss  0x4a83(%rip),%ymm11        # 697c <_sk_callback_avx+0x254>
+  DB  196,98,125,24,29,112,73,0,0         ; vbroadcastss  0x4970(%rip),%ymm11        # 67b4 <_sk_callback_avx+0x254>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
@@ -7057,9 +7005,9 @@
   DB  196,65,57,239,192                   ; vpxor         %xmm8,%xmm8,%xmm8
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,29,255,255,255               ; ja            1e86 <_sk_lerp_565_avx+0x14>
+  DB  15,135,29,255,255,255               ; ja            1dd1 <_sk_lerp_565_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,76,0,0,0                  ; lea           0x4c(%rip),%r9        # 1fc0 <_sk_lerp_565_avx+0x14e>
+  DB  76,141,13,77,0,0,0                  ; lea           0x4d(%rip),%r9        # 1f0c <_sk_lerp_565_avx+0x14f>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7071,47 +7019,48 @@
   DB  196,65,57,196,68,122,4,2            ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
   DB  196,65,57,196,68,122,2,1            ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
   DB  196,65,57,196,4,122,0               ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm8,%xmm8
-  DB  233,200,254,255,255                 ; jmpq          1e86 <_sk_lerp_565_avx+0x14>
-  DB  102,144                             ; xchg          %ax,%ax
-  DB  242,255                             ; repnz         (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  234                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,226                             ; jmpq          *%rdx
+  DB  233,200,254,255,255                 ; jmpq          1dd1 <_sk_lerp_565_avx+0x14>
+  DB  15,31,0                             ; nopl          (%rax)
+  DB  241                                 ; icebp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  218,255                             ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,210                             ; callq         *%rdx
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,202                             ; dec           %edx
+  DB  233,255,255,255,225                 ; jmpq          ffffffffe2001f14 <_sk_callback_avx+0xffffffffe1ffb9b4>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  189                                 ; .byte         0xbd
+  DB  217,255                             ; fcos
+  DB  255                                 ; (bad)
+  DB  255,209                             ; callq         *%rcx
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,201                             ; dec           %ecx
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  188                                 ; .byte         0xbc
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
 
 PUBLIC _sk_load_tables_avx
 _sk_load_tables_avx LABEL PROC
+  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,139,0                            ; mov           (%rax),%r8
-  DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,26,2,0,0                     ; jne           2204 <_sk_load_tables_avx+0x228>
-  DB  196,65,124,16,4,184                 ; vmovups       (%r8,%rdi,4),%ymm8
+  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
+  DB  76,3,8                              ; add           (%rax),%r9
+  DB  77,133,192                          ; test          %r8,%r8
+  DB  15,133,31,2,0,0                     ; jne           2160 <_sk_load_tables_avx+0x238>
+  DB  196,65,124,16,17                    ; vmovups       (%r9),%ymm10
   DB  85                                  ; push          %rbp
   DB  65,87                               ; push          %r15
   DB  65,86                               ; push          %r14
   DB  65,85                               ; push          %r13
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
-  DB  197,124,40,13,158,76,0,0            ; vmovaps       0x4c9e(%rip),%ymm9        # 6ca0 <_sk_callback_avx+0x578>
-  DB  196,193,60,84,193                   ; vandps        %ymm9,%ymm8,%ymm0
+  DB  80                                  ; push          %rax
+  DB  197,124,40,13,39,76,0,0             ; vmovaps       0x4c27(%rip),%ymm9        # 6b80 <_sk_callback_avx+0x620>
+  DB  196,193,44,84,193                   ; vandps        %ymm9,%ymm10,%ymm0
   DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
   DB  69,137,203                          ; mov           %r9d,%r11d
   DB  196,195,249,22,194,1                ; vpextrq       $0x1,%xmm0,%r10
@@ -7119,26 +7068,26 @@
   DB  73,193,234,32                       ; shr           $0x20,%r10
   DB  73,193,233,32                       ; shr           $0x20,%r9
   DB  196,227,125,25,192,1                ; vextractf128  $0x1,%ymm0,%xmm0
-  DB  196,193,249,126,196                 ; vmovq         %xmm0,%r12
-  DB  69,137,231                          ; mov           %r12d,%r15d
-  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
-  DB  65,137,221                          ; mov           %ebx,%r13d
+  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
+  DB  65,137,223                          ; mov           %ebx,%r15d
+  DB  196,227,249,22,193,1                ; vpextrq       $0x1,%xmm0,%rcx
+  DB  65,137,205                          ; mov           %ecx,%r13d
+  DB  72,193,233,32                       ; shr           $0x20,%rcx
   DB  72,193,235,32                       ; shr           $0x20,%rbx
-  DB  73,193,236,32                       ; shr           $0x20,%r12
   DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
-  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
+  DB  76,139,96,16                        ; mov           0x10(%rax),%r12
   DB  196,161,122,16,68,189,0             ; vmovss        0x0(%rbp,%r15,4),%xmm0
-  DB  196,163,121,33,68,165,0,16          ; vinsertps     $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
+  DB  196,227,121,33,68,157,0,16          ; vinsertps     $0x10,0x0(%rbp,%rbx,4),%xmm0,%xmm0
   DB  196,163,121,33,68,173,0,32          ; vinsertps     $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
-  DB  196,227,121,33,68,157,0,48          ; vinsertps     $0x30,0x0(%rbp,%rbx,4),%xmm0,%xmm0
+  DB  196,227,121,33,68,141,0,48          ; vinsertps     $0x30,0x0(%rbp,%rcx,4),%xmm0,%xmm0
   DB  196,161,122,16,76,157,0             ; vmovss        0x0(%rbp,%r11,4),%xmm1
   DB  196,163,113,33,76,141,0,16          ; vinsertps     $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
   DB  196,163,113,33,76,181,0,32          ; vinsertps     $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
   DB  196,163,113,33,76,149,0,48          ; vinsertps     $0x30,0x0(%rbp,%r10,4),%xmm1,%xmm1
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
-  DB  196,67,125,25,194,1                 ; vextractf128  $0x1,%ymm8,%xmm10
-  DB  196,193,105,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm2
+  DB  196,193,113,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm1
+  DB  196,67,125,25,208,1                 ; vextractf128  $0x1,%ymm10,%xmm8
+  DB  196,193,105,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm2
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  196,193,116,84,201                  ; vandps        %ymm9,%ymm1,%ymm1
   DB  196,193,249,126,201                 ; vmovq         %xmm1,%r9
@@ -7148,36 +7097,36 @@
   DB  73,193,234,32                       ; shr           $0x20,%r10
   DB  73,193,233,32                       ; shr           $0x20,%r9
   DB  196,227,125,25,201,1                ; vextractf128  $0x1,%ymm1,%xmm1
-  DB  196,225,249,126,205                 ; vmovq         %xmm1,%rbp
-  DB  65,137,239                          ; mov           %ebp,%r15d
-  DB  196,227,249,22,203,1                ; vpextrq       $0x1,%xmm1,%rbx
-  DB  65,137,220                          ; mov           %ebx,%r12d
-  DB  72,193,235,32                       ; shr           $0x20,%rbx
+  DB  196,225,249,126,203                 ; vmovq         %xmm1,%rbx
+  DB  65,137,223                          ; mov           %ebx,%r15d
+  DB  196,227,249,22,205,1                ; vpextrq       $0x1,%xmm1,%rbp
+  DB  137,233                             ; mov           %ebp,%ecx
   DB  72,193,237,32                       ; shr           $0x20,%rbp
-  DB  196,129,122,16,12,184               ; vmovss        (%r8,%r15,4),%xmm1
-  DB  196,195,113,33,12,168,16            ; vinsertps     $0x10,(%r8,%rbp,4),%xmm1,%xmm1
-  DB  196,129,122,16,20,160               ; vmovss        (%r8,%r12,4),%xmm2
+  DB  72,193,235,32                       ; shr           $0x20,%rbx
+  DB  196,129,122,16,12,188               ; vmovss        (%r12,%r15,4),%xmm1
+  DB  196,195,113,33,12,156,16            ; vinsertps     $0x10,(%r12,%rbx,4),%xmm1,%xmm1
+  DB  196,193,122,16,20,140               ; vmovss        (%r12,%rcx,4),%xmm2
   DB  196,227,113,33,202,32               ; vinsertps     $0x20,%xmm2,%xmm1,%xmm1
-  DB  196,193,122,16,20,152               ; vmovss        (%r8,%rbx,4),%xmm2
+  DB  196,193,122,16,20,172               ; vmovss        (%r12,%rbp,4),%xmm2
   DB  196,227,113,33,202,48               ; vinsertps     $0x30,%xmm2,%xmm1,%xmm1
-  DB  196,129,122,16,20,152               ; vmovss        (%r8,%r11,4),%xmm2
-  DB  196,131,105,33,20,136,16            ; vinsertps     $0x10,(%r8,%r9,4),%xmm2,%xmm2
-  DB  196,129,122,16,28,176               ; vmovss        (%r8,%r14,4),%xmm3
+  DB  196,129,122,16,20,156               ; vmovss        (%r12,%r11,4),%xmm2
+  DB  196,131,105,33,20,140,16            ; vinsertps     $0x10,(%r12,%r9,4),%xmm2,%xmm2
+  DB  196,129,122,16,28,180               ; vmovss        (%r12,%r14,4),%xmm3
   DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
-  DB  196,129,122,16,28,144               ; vmovss        (%r8,%r10,4),%xmm3
+  DB  196,129,122,16,28,148               ; vmovss        (%r12,%r10,4),%xmm3
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
   DB  72,139,64,24                        ; mov           0x18(%rax),%rax
-  DB  196,193,105,114,208,16              ; vpsrld        $0x10,%xmm8,%xmm2
-  DB  196,193,97,114,210,16               ; vpsrld        $0x10,%xmm10,%xmm3
+  DB  196,193,105,114,210,16              ; vpsrld        $0x10,%xmm10,%xmm2
+  DB  196,193,97,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm3
   DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
   DB  196,193,108,84,209                  ; vandps        %ymm9,%ymm2,%ymm2
-  DB  196,193,249,126,208                 ; vmovq         %xmm2,%r8
-  DB  69,137,194                          ; mov           %r8d,%r10d
-  DB  196,195,249,22,209,1                ; vpextrq       $0x1,%xmm2,%r9
-  DB  69,137,203                          ; mov           %r9d,%r11d
+  DB  196,193,249,126,209                 ; vmovq         %xmm2,%r9
+  DB  69,137,202                          ; mov           %r9d,%r10d
+  DB  196,227,249,22,209,1                ; vpextrq       $0x1,%xmm2,%rcx
+  DB  65,137,203                          ; mov           %ecx,%r11d
+  DB  72,193,233,32                       ; shr           $0x20,%rcx
   DB  73,193,233,32                       ; shr           $0x20,%r9
-  DB  73,193,232,32                       ; shr           $0x20,%r8
   DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
   DB  196,225,249,126,213                 ; vmovq         %xmm2,%rbp
   DB  65,137,238                          ; mov           %ebp,%r14d
@@ -7192,19 +7141,21 @@
   DB  197,250,16,28,152                   ; vmovss        (%rax,%rbx,4),%xmm3
   DB  196,99,105,33,203,48                ; vinsertps     $0x30,%xmm3,%xmm2,%xmm9
   DB  196,161,122,16,28,144               ; vmovss        (%rax,%r10,4),%xmm3
-  DB  196,163,97,33,28,128,16             ; vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
+  DB  196,163,97,33,28,136,16             ; vinsertps     $0x10,(%rax,%r9,4),%xmm3,%xmm3
   DB  196,161,122,16,20,152               ; vmovss        (%rax,%r11,4),%xmm2
   DB  196,227,97,33,210,32                ; vinsertps     $0x20,%xmm2,%xmm3,%xmm2
-  DB  196,161,122,16,28,136               ; vmovss        (%rax,%r9,4),%xmm3
+  DB  197,250,16,28,136                   ; vmovss        (%rax,%rcx,4),%xmm3
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,195,109,24,209,1                ; vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
-  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
-  DB  196,193,97,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm3
-  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
+  DB  196,193,49,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm9
+  DB  196,193,97,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm3
+  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,143,71,0,0          ; vbroadcastss  0x478f(%rip),%ymm8        # 6980 <_sk_callback_avx+0x258>
+  DB  196,98,125,24,5,114,70,0,0          ; vbroadcastss  0x4672(%rip),%ymm8        # 67b8 <_sk_callback_avx+0x258>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  76,137,193                          ; mov           %r8,%rcx
+  DB  72,131,196,8                        ; add           $0x8,%rsp
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,93                               ; pop           %r13
@@ -7212,57 +7163,20 @@
   DB  65,95                               ; pop           %r15
   DB  93                                  ; pop           %rbp
   DB  255,224                             ; jmpq          *%rax
-  DB  65,137,201                          ; mov           %ecx,%r9d
-  DB  65,128,225,7                        ; and           $0x7,%r9b
-  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  65,254,201                          ; dec           %r9b
-  DB  65,128,249,6                        ; cmp           $0x6,%r9b
-  DB  15,135,211,253,255,255              ; ja            1ff0 <_sk_load_tables_avx+0x14>
-  DB  69,15,182,201                       ; movzbl        %r9b,%r9d
-  DB  76,141,21,140,0,0,0                 ; lea           0x8c(%rip),%r10        # 22b4 <_sk_load_tables_avx+0x2d8>
-  DB  79,99,12,138                        ; movslq        (%r10,%r9,4),%r9
-  DB  77,1,209                            ; add           %r10,%r9
-  DB  65,255,225                          ; jmpq          *%r9
-  DB  196,193,121,110,68,184,24           ; vmovd         0x18(%r8,%rdi,4),%xmm0
-  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
-  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
-  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
-  DB  196,99,117,12,192,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm8
-  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
-  DB  196,195,121,34,68,184,20,1          ; vpinsrd       $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
-  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
-  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
-  DB  196,195,121,34,68,184,16,0          ; vpinsrd       $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
-  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
-  DB  196,195,57,34,68,184,12,3           ; vpinsrd       $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
-  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  DB  196,195,57,34,68,184,8,2            ; vpinsrd       $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
-  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  DB  196,195,57,34,68,184,4,1            ; vpinsrd       $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
-  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  DB  196,195,57,34,4,184,0               ; vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
-  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  DB  233,62,253,255,255                  ; jmpq          1ff0 <_sk_load_tables_avx+0x14>
-  DB  102,144                             ; xchg          %ax,%ax
-  DB  236                                 ; in            (%dx),%al
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  222,255                             ; fdivrp        %st,%st(7)
-  DB  255                                 ; (bad)
-  DB  255,208                             ; callq         *%rax
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,194                             ; inc           %edx
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,174,255,255,255,154             ; ljmp          *-0x65000001(%rsi)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  126,255                             ; jle           22cd <_sk_load_tables_avx+0x2f1>
-  DB  255                                 ; (bad)
-  DB  255                                 ; .byte         0xff
+  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
+  DB  68,41,193                           ; sub           %r8d,%ecx
+  DB  192,225,3                           ; shl           $0x3,%cl
+  DB  73,199,194,255,255,255,255          ; mov           $0xffffffffffffffff,%r10
+  DB  73,211,234                          ; shr           %cl,%r10
+  DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
+  DB  196,226,121,48,192                  ; vpmovzxbw     %xmm0,%xmm0
+  DB  196,226,121,0,13,72,73,0,0          ; vpshufb       0x4948(%rip),%xmm0,%xmm1        # 6ad0 <_sk_callback_avx+0x570>
+  DB  196,226,121,33,201                  ; vpmovsxbd     %xmm1,%xmm1
+  DB  196,226,121,0,5,74,73,0,0           ; vpshufb       0x494a(%rip),%xmm0,%xmm0        # 6ae0 <_sk_callback_avx+0x580>
+  DB  196,226,121,33,192                  ; vpmovsxbd     %xmm0,%xmm0
+  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
+  DB  196,66,125,44,17                    ; vmaskmovps    (%r9),%ymm0,%ymm10
+  DB  233,155,253,255,255                 ; jmpq          1f46 <_sk_load_tables_avx+0x1e>
 
 PUBLIC _sk_load_tables_u16_be_avx
 _sk_load_tables_u16_be_avx LABEL PROC
@@ -7270,7 +7184,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,113,2,0,0                    ; jne           2557 <_sk_load_tables_u16_be_avx+0x287>
+  DB  15,133,113,2,0,0                    ; jne           2432 <_sk_load_tables_u16_be_avx+0x287>
   DB  196,1,121,16,4,72                   ; vmovupd       (%r8,%r9,2),%xmm8
   DB  196,129,121,16,84,72,16             ; vmovupd       0x10(%r8,%r9,2),%xmm2
   DB  196,129,121,16,92,72,32             ; vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -7292,7 +7206,7 @@
   DB  197,177,108,208                     ; vpunpcklqdq   %xmm0,%xmm9,%xmm2
   DB  197,177,109,200                     ; vpunpckhqdq   %xmm0,%xmm9,%xmm1
   DB  196,65,57,108,212                   ; vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  DB  197,121,111,29,222,73,0,0           ; vmovdqa       0x49de(%rip),%xmm11        # 6d20 <_sk_callback_avx+0x5f8>
+  DB  197,121,111,29,211,72,0,0           ; vmovdqa       0x48d3(%rip),%xmm11        # 6af0 <_sk_callback_avx+0x590>
   DB  196,193,105,219,195                 ; vpand         %xmm11,%xmm2,%xmm0
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  196,193,121,105,209                 ; vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -7391,7 +7305,7 @@
   DB  196,226,121,51,219                  ; vpmovzxwd     %xmm3,%xmm3
   DB  196,195,101,24,216,1                ; vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,64,68,0,0           ; vbroadcastss  0x4440(%rip),%ymm8        # 6984 <_sk_callback_avx+0x25c>
+  DB  196,98,125,24,5,157,67,0,0          ; vbroadcastss  0x439d(%rip),%ymm8        # 67bc <_sk_callback_avx+0x25c>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -7404,29 +7318,29 @@
   DB  196,1,123,16,4,72                   ; vmovsd        (%r8,%r9,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            25bd <_sk_load_tables_u16_be_avx+0x2ed>
+  DB  116,85                              ; je            2498 <_sk_load_tables_u16_be_avx+0x2ed>
   DB  196,1,57,22,68,72,8                 ; vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            25bd <_sk_load_tables_u16_be_avx+0x2ed>
+  DB  114,72                              ; jb            2498 <_sk_load_tables_u16_be_avx+0x2ed>
   DB  196,129,123,16,84,72,16             ; vmovsd        0x10(%r8,%r9,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            25ca <_sk_load_tables_u16_be_avx+0x2fa>
+  DB  116,72                              ; je            24a5 <_sk_load_tables_u16_be_avx+0x2fa>
   DB  196,129,105,22,84,72,24             ; vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            25ca <_sk_load_tables_u16_be_avx+0x2fa>
+  DB  114,59                              ; jb            24a5 <_sk_load_tables_u16_be_avx+0x2fa>
   DB  196,129,123,16,92,72,32             ; vmovsd        0x20(%r8,%r9,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,97,253,255,255               ; je            2301 <_sk_load_tables_u16_be_avx+0x31>
+  DB  15,132,97,253,255,255               ; je            21dc <_sk_load_tables_u16_be_avx+0x31>
   DB  196,129,97,22,92,72,40              ; vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,80,253,255,255               ; jb            2301 <_sk_load_tables_u16_be_avx+0x31>
+  DB  15,130,80,253,255,255               ; jb            21dc <_sk_load_tables_u16_be_avx+0x31>
   DB  196,1,122,126,76,72,48              ; vmovq         0x30(%r8,%r9,2),%xmm9
-  DB  233,68,253,255,255                  ; jmpq          2301 <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,68,253,255,255                  ; jmpq          21dc <_sk_load_tables_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,55,253,255,255                  ; jmpq          2301 <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,55,253,255,255                  ; jmpq          21dc <_sk_load_tables_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,46,253,255,255                  ; jmpq          2301 <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,46,253,255,255                  ; jmpq          21dc <_sk_load_tables_u16_be_avx+0x31>
 
 PUBLIC _sk_load_tables_rgb_u16_be_avx
 _sk_load_tables_rgb_u16_be_avx LABEL PROC
@@ -7434,7 +7348,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,127                       ; lea           (%rdi,%rdi,2),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,93,2,0,0                     ; jne           2842 <_sk_load_tables_rgb_u16_be_avx+0x26f>
+  DB  15,133,93,2,0,0                     ; jne           271d <_sk_load_tables_rgb_u16_be_avx+0x26f>
   DB  196,129,122,111,4,72                ; vmovdqu       (%r8,%r9,2),%xmm0
   DB  196,129,122,111,84,72,12            ; vmovdqu       0xc(%r8,%r9,2),%xmm2
   DB  196,129,122,111,76,72,24            ; vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -7461,7 +7375,7 @@
   DB  197,185,108,202                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm1
   DB  197,185,109,210                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm2
   DB  197,121,108,195                     ; vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  DB  197,121,111,13,215,70,0,0           ; vmovdqa       0x46d7(%rip),%xmm9        # 6d30 <_sk_callback_avx+0x608>
+  DB  197,121,111,13,204,69,0,0           ; vmovdqa       0x45cc(%rip),%xmm9        # 6b00 <_sk_callback_avx+0x5a0>
   DB  196,193,113,219,193                 ; vpand         %xmm9,%xmm1,%xmm0
   DB  196,65,41,239,210                   ; vpxor         %xmm10,%xmm10,%xmm10
   DB  196,193,121,105,202                 ; vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -7553,7 +7467,7 @@
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,195,109,24,208,1                ; vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,82,65,0,0         ; vbroadcastss  0x4152(%rip),%ymm3        # 6988 <_sk_callback_avx+0x260>
+  DB  196,226,125,24,29,175,64,0,0        ; vbroadcastss  0x40af(%rip),%ymm3        # 67c0 <_sk_callback_avx+0x260>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,93                               ; pop           %r13
@@ -7564,36 +7478,36 @@
   DB  196,129,121,110,4,72                ; vmovd         (%r8,%r9,2),%xmm0
   DB  196,129,121,196,68,72,4,2           ; vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           285b <_sk_load_tables_rgb_u16_be_avx+0x288>
-  DB  233,190,253,255,255                 ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,5                               ; jne           2736 <_sk_load_tables_rgb_u16_be_avx+0x288>
+  DB  233,190,253,255,255                 ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,76,72,6             ; vmovd         0x6(%r8,%r9,2),%xmm1
   DB  196,1,113,196,68,72,10,2            ; vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            288a <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+  DB  114,26                              ; jb            2765 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
   DB  196,129,121,110,76,72,12            ; vmovd         0xc(%r8,%r9,2),%xmm1
   DB  196,129,113,196,84,72,16,2          ; vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           288f <_sk_load_tables_rgb_u16_be_avx+0x2bc>
-  DB  233,143,253,255,255                 ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,138,253,255,255                 ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           276a <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+  DB  233,143,253,255,255                 ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,138,253,255,255                 ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,76,72,18            ; vmovd         0x12(%r8,%r9,2),%xmm1
   DB  196,1,113,196,76,72,22,2            ; vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            28be <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+  DB  114,26                              ; jb            2799 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
   DB  196,129,121,110,76,72,24            ; vmovd         0x18(%r8,%r9,2),%xmm1
   DB  196,129,113,196,76,72,28,2          ; vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           28c3 <_sk_load_tables_rgb_u16_be_avx+0x2f0>
-  DB  233,91,253,255,255                  ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,86,253,255,255                  ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           279e <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+  DB  233,91,253,255,255                  ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,86,253,255,255                  ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,92,72,30            ; vmovd         0x1e(%r8,%r9,2),%xmm3
   DB  196,1,97,196,92,72,34,2             ; vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            28ec <_sk_load_tables_rgb_u16_be_avx+0x319>
+  DB  114,20                              ; jb            27c7 <_sk_load_tables_rgb_u16_be_avx+0x319>
   DB  196,129,121,110,92,72,36            ; vmovd         0x24(%r8,%r9,2),%xmm3
   DB  196,129,97,196,92,72,40,2           ; vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  DB  233,45,253,255,255                  ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,40,253,255,255                  ; jmpq          2619 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,45,253,255,255                  ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,40,253,255,255                  ; jmpq          24f4 <_sk_load_tables_rgb_u16_be_avx+0x46>
 
 PUBLIC _sk_byte_tables_avx
 _sk_byte_tables_avx LABEL PROC
@@ -7604,7 +7518,7 @@
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,134,64,0,0          ; vbroadcastss  0x4086(%rip),%ymm8        # 698c <_sk_callback_avx+0x264>
+  DB  196,98,125,24,5,227,63,0,0          ; vbroadcastss  0x3fe3(%rip),%ymm8        # 67c4 <_sk_callback_avx+0x264>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
@@ -7641,7 +7555,7 @@
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,212,63,0,0         ; vbroadcastss  0x3fd4(%rip),%ymm9        # 6990 <_sk_callback_avx+0x268>
+  DB  196,98,125,24,13,49,63,0,0          ; vbroadcastss  0x3f31(%rip),%ymm9        # 67c8 <_sk_callback_avx+0x268>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -7801,7 +7715,7 @@
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,250,60,0,0         ; vbroadcastss  0x3cfa(%rip),%ymm9        # 6994 <_sk_callback_avx+0x26c>
+  DB  196,98,125,24,13,87,60,0,0          ; vbroadcastss  0x3c57(%rip),%ymm9        # 67cc <_sk_callback_avx+0x26c>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -8088,36 +8002,36 @@
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,216                      ; vcvtdq2ps     %ymm0,%ymm11
-  DB  196,98,125,24,37,88,56,0,0          ; vbroadcastss  0x3858(%rip),%ymm12        # 6998 <_sk_callback_avx+0x270>
+  DB  196,98,125,24,37,181,55,0,0         ; vbroadcastss  0x37b5(%rip),%ymm12        # 67d0 <_sk_callback_avx+0x270>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,78,56,0,0          ; vbroadcastss  0x384e(%rip),%ymm12        # 699c <_sk_callback_avx+0x274>
+  DB  196,98,125,24,37,171,55,0,0         ; vbroadcastss  0x37ab(%rip),%ymm12        # 67d4 <_sk_callback_avx+0x274>
   DB  196,193,124,84,196                  ; vandps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,68,56,0,0          ; vbroadcastss  0x3844(%rip),%ymm12        # 69a0 <_sk_callback_avx+0x278>
+  DB  196,98,125,24,37,161,55,0,0         ; vbroadcastss  0x37a1(%rip),%ymm12        # 67d8 <_sk_callback_avx+0x278>
   DB  196,193,124,86,196                  ; vorps         %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,58,56,0,0          ; vbroadcastss  0x383a(%rip),%ymm12        # 69a4 <_sk_callback_avx+0x27c>
+  DB  196,98,125,24,37,151,55,0,0         ; vbroadcastss  0x3797(%rip),%ymm12        # 67dc <_sk_callback_avx+0x27c>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,48,56,0,0          ; vbroadcastss  0x3830(%rip),%ymm12        # 69a8 <_sk_callback_avx+0x280>
+  DB  196,98,125,24,37,141,55,0,0         ; vbroadcastss  0x378d(%rip),%ymm12        # 67e0 <_sk_callback_avx+0x280>
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,33,56,0,0          ; vbroadcastss  0x3821(%rip),%ymm12        # 69ac <_sk_callback_avx+0x284>
+  DB  196,98,125,24,37,126,55,0,0         ; vbroadcastss  0x377e(%rip),%ymm12        # 67e4 <_sk_callback_avx+0x284>
   DB  196,193,124,88,196                  ; vaddps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,23,56,0,0          ; vbroadcastss  0x3817(%rip),%ymm12        # 69b0 <_sk_callback_avx+0x288>
+  DB  196,98,125,24,37,116,55,0,0         ; vbroadcastss  0x3774(%rip),%ymm12        # 67e8 <_sk_callback_avx+0x288>
   DB  197,156,94,192                      ; vdivps        %ymm0,%ymm12,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
   DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
   DB  196,99,125,8,208,1                  ; vroundps      $0x1,%ymm0,%ymm10
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,251,55,0,0         ; vbroadcastss  0x37fb(%rip),%ymm11        # 69b4 <_sk_callback_avx+0x28c>
+  DB  196,98,125,24,29,88,55,0,0          ; vbroadcastss  0x3758(%rip),%ymm11        # 67ec <_sk_callback_avx+0x28c>
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,241,55,0,0         ; vbroadcastss  0x37f1(%rip),%ymm11        # 69b8 <_sk_callback_avx+0x290>
+  DB  196,98,125,24,29,78,55,0,0          ; vbroadcastss  0x374e(%rip),%ymm11        # 67f0 <_sk_callback_avx+0x290>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,124,92,195                  ; vsubps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,226,55,0,0         ; vbroadcastss  0x37e2(%rip),%ymm11        # 69bc <_sk_callback_avx+0x294>
+  DB  196,98,125,24,29,63,55,0,0          ; vbroadcastss  0x373f(%rip),%ymm11        # 67f4 <_sk_callback_avx+0x294>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,216,55,0,0         ; vbroadcastss  0x37d8(%rip),%ymm11        # 69c0 <_sk_callback_avx+0x298>
+  DB  196,98,125,24,29,53,55,0,0          ; vbroadcastss  0x3735(%rip),%ymm11        # 67f8 <_sk_callback_avx+0x298>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
-  DB  196,98,125,24,21,201,55,0,0         ; vbroadcastss  0x37c9(%rip),%ymm10        # 69c4 <_sk_callback_avx+0x29c>
+  DB  196,98,125,24,21,38,55,0,0          ; vbroadcastss  0x3726(%rip),%ymm10        # 67fc <_sk_callback_avx+0x29c>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -8125,7 +8039,7 @@
   DB  196,195,125,74,193,128              ; vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,160,55,0,0          ; vbroadcastss  0x37a0(%rip),%ymm8        # 69c8 <_sk_callback_avx+0x2a0>
+  DB  196,98,125,24,5,253,54,0,0          ; vbroadcastss  0x36fd(%rip),%ymm8        # 6800 <_sk_callback_avx+0x2a0>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8145,36 +8059,36 @@
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,217                      ; vcvtdq2ps     %ymm1,%ymm11
-  DB  196,98,125,24,37,81,55,0,0          ; vbroadcastss  0x3751(%rip),%ymm12        # 69cc <_sk_callback_avx+0x2a4>
+  DB  196,98,125,24,37,174,54,0,0         ; vbroadcastss  0x36ae(%rip),%ymm12        # 6804 <_sk_callback_avx+0x2a4>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,71,55,0,0          ; vbroadcastss  0x3747(%rip),%ymm12        # 69d0 <_sk_callback_avx+0x2a8>
+  DB  196,98,125,24,37,164,54,0,0         ; vbroadcastss  0x36a4(%rip),%ymm12        # 6808 <_sk_callback_avx+0x2a8>
   DB  196,193,116,84,204                  ; vandps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,61,55,0,0          ; vbroadcastss  0x373d(%rip),%ymm12        # 69d4 <_sk_callback_avx+0x2ac>
+  DB  196,98,125,24,37,154,54,0,0         ; vbroadcastss  0x369a(%rip),%ymm12        # 680c <_sk_callback_avx+0x2ac>
   DB  196,193,116,86,204                  ; vorps         %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,51,55,0,0          ; vbroadcastss  0x3733(%rip),%ymm12        # 69d8 <_sk_callback_avx+0x2b0>
+  DB  196,98,125,24,37,144,54,0,0         ; vbroadcastss  0x3690(%rip),%ymm12        # 6810 <_sk_callback_avx+0x2b0>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,41,55,0,0          ; vbroadcastss  0x3729(%rip),%ymm12        # 69dc <_sk_callback_avx+0x2b4>
+  DB  196,98,125,24,37,134,54,0,0         ; vbroadcastss  0x3686(%rip),%ymm12        # 6814 <_sk_callback_avx+0x2b4>
   DB  196,65,116,89,228                   ; vmulps        %ymm12,%ymm1,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,26,55,0,0          ; vbroadcastss  0x371a(%rip),%ymm12        # 69e0 <_sk_callback_avx+0x2b8>
+  DB  196,98,125,24,37,119,54,0,0         ; vbroadcastss  0x3677(%rip),%ymm12        # 6818 <_sk_callback_avx+0x2b8>
   DB  196,193,116,88,204                  ; vaddps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,16,55,0,0          ; vbroadcastss  0x3710(%rip),%ymm12        # 69e4 <_sk_callback_avx+0x2bc>
+  DB  196,98,125,24,37,109,54,0,0         ; vbroadcastss  0x366d(%rip),%ymm12        # 681c <_sk_callback_avx+0x2bc>
   DB  197,156,94,201                      ; vdivps        %ymm1,%ymm12,%ymm1
   DB  197,164,92,201                      ; vsubps        %ymm1,%ymm11,%ymm1
   DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
   DB  196,99,125,8,209,1                  ; vroundps      $0x1,%ymm1,%ymm10
   DB  196,65,116,92,210                   ; vsubps        %ymm10,%ymm1,%ymm10
-  DB  196,98,125,24,29,244,54,0,0         ; vbroadcastss  0x36f4(%rip),%ymm11        # 69e8 <_sk_callback_avx+0x2c0>
+  DB  196,98,125,24,29,81,54,0,0          ; vbroadcastss  0x3651(%rip),%ymm11        # 6820 <_sk_callback_avx+0x2c0>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,234,54,0,0         ; vbroadcastss  0x36ea(%rip),%ymm11        # 69ec <_sk_callback_avx+0x2c4>
+  DB  196,98,125,24,29,71,54,0,0          ; vbroadcastss  0x3647(%rip),%ymm11        # 6824 <_sk_callback_avx+0x2c4>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,116,92,203                  ; vsubps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,219,54,0,0         ; vbroadcastss  0x36db(%rip),%ymm11        # 69f0 <_sk_callback_avx+0x2c8>
+  DB  196,98,125,24,29,56,54,0,0          ; vbroadcastss  0x3638(%rip),%ymm11        # 6828 <_sk_callback_avx+0x2c8>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,209,54,0,0         ; vbroadcastss  0x36d1(%rip),%ymm11        # 69f4 <_sk_callback_avx+0x2cc>
+  DB  196,98,125,24,29,46,54,0,0          ; vbroadcastss  0x362e(%rip),%ymm11        # 682c <_sk_callback_avx+0x2cc>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,116,88,202                  ; vaddps        %ymm10,%ymm1,%ymm1
-  DB  196,98,125,24,21,194,54,0,0         ; vbroadcastss  0x36c2(%rip),%ymm10        # 69f8 <_sk_callback_avx+0x2d0>
+  DB  196,98,125,24,21,31,54,0,0          ; vbroadcastss  0x361f(%rip),%ymm10        # 6830 <_sk_callback_avx+0x2d0>
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -8182,7 +8096,7 @@
   DB  196,195,117,74,201,128              ; vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,5,153,54,0,0          ; vbroadcastss  0x3699(%rip),%ymm8        # 69fc <_sk_callback_avx+0x2d4>
+  DB  196,98,125,24,5,246,53,0,0          ; vbroadcastss  0x35f6(%rip),%ymm8        # 6834 <_sk_callback_avx+0x2d4>
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8202,36 +8116,36 @@
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,218                      ; vcvtdq2ps     %ymm2,%ymm11
-  DB  196,98,125,24,37,74,54,0,0          ; vbroadcastss  0x364a(%rip),%ymm12        # 6a00 <_sk_callback_avx+0x2d8>
+  DB  196,98,125,24,37,167,53,0,0         ; vbroadcastss  0x35a7(%rip),%ymm12        # 6838 <_sk_callback_avx+0x2d8>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,64,54,0,0          ; vbroadcastss  0x3640(%rip),%ymm12        # 6a04 <_sk_callback_avx+0x2dc>
+  DB  196,98,125,24,37,157,53,0,0         ; vbroadcastss  0x359d(%rip),%ymm12        # 683c <_sk_callback_avx+0x2dc>
   DB  196,193,108,84,212                  ; vandps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,54,54,0,0          ; vbroadcastss  0x3636(%rip),%ymm12        # 6a08 <_sk_callback_avx+0x2e0>
+  DB  196,98,125,24,37,147,53,0,0         ; vbroadcastss  0x3593(%rip),%ymm12        # 6840 <_sk_callback_avx+0x2e0>
   DB  196,193,108,86,212                  ; vorps         %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,44,54,0,0          ; vbroadcastss  0x362c(%rip),%ymm12        # 6a0c <_sk_callback_avx+0x2e4>
+  DB  196,98,125,24,37,137,53,0,0         ; vbroadcastss  0x3589(%rip),%ymm12        # 6844 <_sk_callback_avx+0x2e4>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,34,54,0,0          ; vbroadcastss  0x3622(%rip),%ymm12        # 6a10 <_sk_callback_avx+0x2e8>
+  DB  196,98,125,24,37,127,53,0,0         ; vbroadcastss  0x357f(%rip),%ymm12        # 6848 <_sk_callback_avx+0x2e8>
   DB  196,65,108,89,228                   ; vmulps        %ymm12,%ymm2,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,19,54,0,0          ; vbroadcastss  0x3613(%rip),%ymm12        # 6a14 <_sk_callback_avx+0x2ec>
+  DB  196,98,125,24,37,112,53,0,0         ; vbroadcastss  0x3570(%rip),%ymm12        # 684c <_sk_callback_avx+0x2ec>
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,9,54,0,0           ; vbroadcastss  0x3609(%rip),%ymm12        # 6a18 <_sk_callback_avx+0x2f0>
+  DB  196,98,125,24,37,102,53,0,0         ; vbroadcastss  0x3566(%rip),%ymm12        # 6850 <_sk_callback_avx+0x2f0>
   DB  197,156,94,210                      ; vdivps        %ymm2,%ymm12,%ymm2
   DB  197,164,92,210                      ; vsubps        %ymm2,%ymm11,%ymm2
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  196,99,125,8,210,1                  ; vroundps      $0x1,%ymm2,%ymm10
   DB  196,65,108,92,210                   ; vsubps        %ymm10,%ymm2,%ymm10
-  DB  196,98,125,24,29,237,53,0,0         ; vbroadcastss  0x35ed(%rip),%ymm11        # 6a1c <_sk_callback_avx+0x2f4>
+  DB  196,98,125,24,29,74,53,0,0          ; vbroadcastss  0x354a(%rip),%ymm11        # 6854 <_sk_callback_avx+0x2f4>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,227,53,0,0         ; vbroadcastss  0x35e3(%rip),%ymm11        # 6a20 <_sk_callback_avx+0x2f8>
+  DB  196,98,125,24,29,64,53,0,0          ; vbroadcastss  0x3540(%rip),%ymm11        # 6858 <_sk_callback_avx+0x2f8>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,108,92,211                  ; vsubps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,212,53,0,0         ; vbroadcastss  0x35d4(%rip),%ymm11        # 6a24 <_sk_callback_avx+0x2fc>
+  DB  196,98,125,24,29,49,53,0,0          ; vbroadcastss  0x3531(%rip),%ymm11        # 685c <_sk_callback_avx+0x2fc>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,202,53,0,0         ; vbroadcastss  0x35ca(%rip),%ymm11        # 6a28 <_sk_callback_avx+0x300>
+  DB  196,98,125,24,29,39,53,0,0          ; vbroadcastss  0x3527(%rip),%ymm11        # 6860 <_sk_callback_avx+0x300>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
-  DB  196,98,125,24,21,187,53,0,0         ; vbroadcastss  0x35bb(%rip),%ymm10        # 6a2c <_sk_callback_avx+0x304>
+  DB  196,98,125,24,21,24,53,0,0          ; vbroadcastss  0x3518(%rip),%ymm10        # 6864 <_sk_callback_avx+0x304>
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  197,253,91,210                      ; vcvtps2dq     %ymm2,%ymm2
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -8239,7 +8153,7 @@
   DB  196,195,109,74,209,128              ; vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,146,53,0,0          ; vbroadcastss  0x3592(%rip),%ymm8        # 6a30 <_sk_callback_avx+0x308>
+  DB  196,98,125,24,5,239,52,0,0          ; vbroadcastss  0x34ef(%rip),%ymm8        # 6868 <_sk_callback_avx+0x308>
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8259,36 +8173,36 @@
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,219                      ; vcvtdq2ps     %ymm3,%ymm11
-  DB  196,98,125,24,37,67,53,0,0          ; vbroadcastss  0x3543(%rip),%ymm12        # 6a34 <_sk_callback_avx+0x30c>
+  DB  196,98,125,24,37,160,52,0,0         ; vbroadcastss  0x34a0(%rip),%ymm12        # 686c <_sk_callback_avx+0x30c>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,57,53,0,0          ; vbroadcastss  0x3539(%rip),%ymm12        # 6a38 <_sk_callback_avx+0x310>
+  DB  196,98,125,24,37,150,52,0,0         ; vbroadcastss  0x3496(%rip),%ymm12        # 6870 <_sk_callback_avx+0x310>
   DB  196,193,100,84,220                  ; vandps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,47,53,0,0          ; vbroadcastss  0x352f(%rip),%ymm12        # 6a3c <_sk_callback_avx+0x314>
+  DB  196,98,125,24,37,140,52,0,0         ; vbroadcastss  0x348c(%rip),%ymm12        # 6874 <_sk_callback_avx+0x314>
   DB  196,193,100,86,220                  ; vorps         %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,37,53,0,0          ; vbroadcastss  0x3525(%rip),%ymm12        # 6a40 <_sk_callback_avx+0x318>
+  DB  196,98,125,24,37,130,52,0,0         ; vbroadcastss  0x3482(%rip),%ymm12        # 6878 <_sk_callback_avx+0x318>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,27,53,0,0          ; vbroadcastss  0x351b(%rip),%ymm12        # 6a44 <_sk_callback_avx+0x31c>
+  DB  196,98,125,24,37,120,52,0,0         ; vbroadcastss  0x3478(%rip),%ymm12        # 687c <_sk_callback_avx+0x31c>
   DB  196,65,100,89,228                   ; vmulps        %ymm12,%ymm3,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,12,53,0,0          ; vbroadcastss  0x350c(%rip),%ymm12        # 6a48 <_sk_callback_avx+0x320>
+  DB  196,98,125,24,37,105,52,0,0         ; vbroadcastss  0x3469(%rip),%ymm12        # 6880 <_sk_callback_avx+0x320>
   DB  196,193,100,88,220                  ; vaddps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,2,53,0,0           ; vbroadcastss  0x3502(%rip),%ymm12        # 6a4c <_sk_callback_avx+0x324>
+  DB  196,98,125,24,37,95,52,0,0          ; vbroadcastss  0x345f(%rip),%ymm12        # 6884 <_sk_callback_avx+0x324>
   DB  197,156,94,219                      ; vdivps        %ymm3,%ymm12,%ymm3
   DB  197,164,92,219                      ; vsubps        %ymm3,%ymm11,%ymm3
   DB  197,172,89,219                      ; vmulps        %ymm3,%ymm10,%ymm3
   DB  196,99,125,8,211,1                  ; vroundps      $0x1,%ymm3,%ymm10
   DB  196,65,100,92,210                   ; vsubps        %ymm10,%ymm3,%ymm10
-  DB  196,98,125,24,29,230,52,0,0         ; vbroadcastss  0x34e6(%rip),%ymm11        # 6a50 <_sk_callback_avx+0x328>
+  DB  196,98,125,24,29,67,52,0,0          ; vbroadcastss  0x3443(%rip),%ymm11        # 6888 <_sk_callback_avx+0x328>
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,220,52,0,0         ; vbroadcastss  0x34dc(%rip),%ymm11        # 6a54 <_sk_callback_avx+0x32c>
+  DB  196,98,125,24,29,57,52,0,0          ; vbroadcastss  0x3439(%rip),%ymm11        # 688c <_sk_callback_avx+0x32c>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,100,92,219                  ; vsubps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,205,52,0,0         ; vbroadcastss  0x34cd(%rip),%ymm11        # 6a58 <_sk_callback_avx+0x330>
+  DB  196,98,125,24,29,42,52,0,0          ; vbroadcastss  0x342a(%rip),%ymm11        # 6890 <_sk_callback_avx+0x330>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,195,52,0,0         ; vbroadcastss  0x34c3(%rip),%ymm11        # 6a5c <_sk_callback_avx+0x334>
+  DB  196,98,125,24,29,32,52,0,0          ; vbroadcastss  0x3420(%rip),%ymm11        # 6894 <_sk_callback_avx+0x334>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,100,88,218                  ; vaddps        %ymm10,%ymm3,%ymm3
-  DB  196,98,125,24,21,180,52,0,0         ; vbroadcastss  0x34b4(%rip),%ymm10        # 6a60 <_sk_callback_avx+0x338>
+  DB  196,98,125,24,21,17,52,0,0          ; vbroadcastss  0x3411(%rip),%ymm10        # 6898 <_sk_callback_avx+0x338>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,253,91,219                      ; vcvtps2dq     %ymm3,%ymm3
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -8296,38 +8210,38 @@
   DB  196,195,101,74,217,128              ; vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
-  DB  196,98,125,24,5,139,52,0,0          ; vbroadcastss  0x348b(%rip),%ymm8        # 6a64 <_sk_callback_avx+0x33c>
+  DB  196,98,125,24,5,232,51,0,0          ; vbroadcastss  0x33e8(%rip),%ymm8        # 689c <_sk_callback_avx+0x33c>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_lab_to_xyz_avx
 _sk_lab_to_xyz_avx LABEL PROC
-  DB  196,98,125,24,5,125,52,0,0          ; vbroadcastss  0x347d(%rip),%ymm8        # 6a68 <_sk_callback_avx+0x340>
+  DB  196,98,125,24,5,218,51,0,0          ; vbroadcastss  0x33da(%rip),%ymm8        # 68a0 <_sk_callback_avx+0x340>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,115,52,0,0          ; vbroadcastss  0x3473(%rip),%ymm8        # 6a6c <_sk_callback_avx+0x344>
+  DB  196,98,125,24,5,208,51,0,0          ; vbroadcastss  0x33d0(%rip),%ymm8        # 68a4 <_sk_callback_avx+0x344>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,13,105,52,0,0         ; vbroadcastss  0x3469(%rip),%ymm9        # 6a70 <_sk_callback_avx+0x348>
+  DB  196,98,125,24,13,198,51,0,0         ; vbroadcastss  0x33c6(%rip),%ymm9        # 68a8 <_sk_callback_avx+0x348>
   DB  196,193,116,88,201                  ; vaddps        %ymm9,%ymm1,%ymm1
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  196,193,108,88,209                  ; vaddps        %ymm9,%ymm2,%ymm2
-  DB  196,98,125,24,5,85,52,0,0           ; vbroadcastss  0x3455(%rip),%ymm8        # 6a74 <_sk_callback_avx+0x34c>
+  DB  196,98,125,24,5,178,51,0,0          ; vbroadcastss  0x33b2(%rip),%ymm8        # 68ac <_sk_callback_avx+0x34c>
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,75,52,0,0           ; vbroadcastss  0x344b(%rip),%ymm8        # 6a78 <_sk_callback_avx+0x350>
+  DB  196,98,125,24,5,168,51,0,0          ; vbroadcastss  0x33a8(%rip),%ymm8        # 68b0 <_sk_callback_avx+0x350>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,65,52,0,0           ; vbroadcastss  0x3441(%rip),%ymm8        # 6a7c <_sk_callback_avx+0x354>
+  DB  196,98,125,24,5,158,51,0,0          ; vbroadcastss  0x339e(%rip),%ymm8        # 68b4 <_sk_callback_avx+0x354>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
-  DB  196,98,125,24,5,51,52,0,0           ; vbroadcastss  0x3433(%rip),%ymm8        # 6a80 <_sk_callback_avx+0x358>
+  DB  196,98,125,24,5,144,51,0,0          ; vbroadcastss  0x3390(%rip),%ymm8        # 68b8 <_sk_callback_avx+0x358>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,252,92,210                      ; vsubps        %ymm2,%ymm0,%ymm2
   DB  197,116,89,193                      ; vmulps        %ymm1,%ymm1,%ymm8
   DB  196,65,116,89,192                   ; vmulps        %ymm8,%ymm1,%ymm8
-  DB  196,98,125,24,13,28,52,0,0          ; vbroadcastss  0x341c(%rip),%ymm9        # 6a84 <_sk_callback_avx+0x35c>
+  DB  196,98,125,24,13,121,51,0,0         ; vbroadcastss  0x3379(%rip),%ymm9        # 68bc <_sk_callback_avx+0x35c>
   DB  196,65,52,194,208,1                 ; vcmpltps      %ymm8,%ymm9,%ymm10
-  DB  196,98,125,24,29,17,52,0,0          ; vbroadcastss  0x3411(%rip),%ymm11        # 6a88 <_sk_callback_avx+0x360>
+  DB  196,98,125,24,29,110,51,0,0         ; vbroadcastss  0x336e(%rip),%ymm11        # 68c0 <_sk_callback_avx+0x360>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,37,7,52,0,0           ; vbroadcastss  0x3407(%rip),%ymm12        # 6a8c <_sk_callback_avx+0x364>
+  DB  196,98,125,24,37,100,51,0,0         ; vbroadcastss  0x3364(%rip),%ymm12        # 68c4 <_sk_callback_avx+0x364>
   DB  196,193,116,89,204                  ; vmulps        %ymm12,%ymm1,%ymm1
   DB  196,67,117,74,192,160               ; vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   DB  197,252,89,200                      ; vmulps        %ymm0,%ymm0,%ymm1
@@ -8342,9 +8256,9 @@
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,193,108,89,212                  ; vmulps        %ymm12,%ymm2,%ymm2
   DB  196,227,109,74,208,144              ; vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  DB  196,226,125,24,5,189,51,0,0         ; vbroadcastss  0x33bd(%rip),%ymm0        # 6a90 <_sk_callback_avx+0x368>
+  DB  196,226,125,24,5,26,51,0,0          ; vbroadcastss  0x331a(%rip),%ymm0        # 68c8 <_sk_callback_avx+0x368>
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,180,51,0,0          ; vbroadcastss  0x33b4(%rip),%ymm8        # 6a94 <_sk_callback_avx+0x36c>
+  DB  196,98,125,24,5,17,51,0,0           ; vbroadcastss  0x3311(%rip),%ymm8        # 68cc <_sk_callback_avx+0x36c>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8356,14 +8270,14 @@
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,62                              ; jne           3737 <_sk_load_a8_avx+0x4e>
+  DB  117,62                              ; jne           3612 <_sk_load_a8_avx+0x4e>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
   DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,120,51,0,0        ; vbroadcastss  0x3378(%rip),%ymm1        # 6a98 <_sk_callback_avx+0x370>
+  DB  196,226,125,24,13,213,50,0,0        ; vbroadcastss  0x32d5(%rip),%ymm1        # 68d0 <_sk_callback_avx+0x370>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -8380,9 +8294,9 @@
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           373f <_sk_load_a8_avx+0x56>
+  DB  117,234                             ; jne           361a <_sk_load_a8_avx+0x56>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,161                             ; jmp           36fd <_sk_load_a8_avx+0x14>
+  DB  235,161                             ; jmp           35d8 <_sk_load_a8_avx+0x14>
 
 PUBLIC _sk_gather_a8_avx
 _sk_gather_a8_avx LABEL PROC
@@ -8430,7 +8344,7 @@
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,109,50,0,0        ; vbroadcastss  0x326d(%rip),%ymm1        # 6a9c <_sk_callback_avx+0x374>
+  DB  196,226,125,24,13,202,49,0,0        ; vbroadcastss  0x31ca(%rip),%ymm1        # 68d4 <_sk_callback_avx+0x374>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -8446,14 +8360,14 @@
 _sk_store_a8_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,72,50,0,0           ; vbroadcastss  0x3248(%rip),%ymm8        # 6aa0 <_sk_callback_avx+0x378>
+  DB  196,98,125,24,5,165,49,0,0          ; vbroadcastss  0x31a5(%rip),%ymm8        # 68d8 <_sk_callback_avx+0x378>
   DB  196,65,100,89,192                   ; vmulps        %ymm8,%ymm3,%ymm8
   DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           3881 <_sk_store_a8_avx+0x37>
+  DB  117,10                              ; jne           375c <_sk_store_a8_avx+0x37>
   DB  196,65,123,17,4,58                  ; vmovsd        %xmm8,(%r10,%rdi,1)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8461,10 +8375,10 @@
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            387d <_sk_store_a8_avx+0x33>
+  DB  119,236                             ; ja            3758 <_sk_store_a8_avx+0x33>
   DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,67,0,0,0                  ; lea           0x43(%rip),%r9        # 38e4 <_sk_store_a8_avx+0x9a>
+  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 37c0 <_sk_store_a8_avx+0x9b>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8475,27 +8389,28 @@
   DB  196,67,121,20,68,58,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   DB  196,67,121,20,68,58,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   DB  196,67,121,20,4,58,0                ; vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  DB  235,154                             ; jmp           387d <_sk_store_a8_avx+0x33>
-  DB  144                                 ; nop
-  DB  246,255                             ; idiv          %bh
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  238                                 ; out           %al,(%dx)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,230                             ; jmpq          *%rsi
+  DB  235,154                             ; jmp           3758 <_sk_store_a8_avx+0x33>
+  DB  102,144                             ; xchg          %ax,%ax
+  DB  245                                 ; cmc
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  222,255                             ; fdivrp        %st,%st(7)
-  DB  255                                 ; (bad)
-  DB  255,214                             ; callq         *%rsi
+  DB  237                                 ; in            (%dx),%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,206                             ; dec           %esi
+  DB  255,229                             ; jmpq          *%rbp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,198                             ; inc           %esi
+  DB  255                                 ; (bad)
+  DB  221,255                             ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,213                             ; callq         *%rbp
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,205                             ; dec           %ebp
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,197                             ; inc           %ebp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
@@ -8507,17 +8422,17 @@
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,67                              ; jne           3953 <_sk_load_g8_avx+0x53>
+  DB  117,67                              ; jne           382f <_sk_load_g8_avx+0x53>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
   DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,109,49,0,0        ; vbroadcastss  0x316d(%rip),%ymm1        # 6aa4 <_sk_callback_avx+0x37c>
+  DB  196,226,125,24,13,201,48,0,0        ; vbroadcastss  0x30c9(%rip),%ymm1        # 68dc <_sk_callback_avx+0x37c>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,98,49,0,0         ; vbroadcastss  0x3162(%rip),%ymm3        # 6aa8 <_sk_callback_avx+0x380>
+  DB  196,226,125,24,29,190,48,0,0        ; vbroadcastss  0x30be(%rip),%ymm3        # 68e0 <_sk_callback_avx+0x380>
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
@@ -8531,9 +8446,9 @@
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           395b <_sk_load_g8_avx+0x5b>
+  DB  117,234                             ; jne           3837 <_sk_load_g8_avx+0x5b>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,156                             ; jmp           3914 <_sk_load_g8_avx+0x14>
+  DB  235,156                             ; jmp           37f0 <_sk_load_g8_avx+0x14>
 
 PUBLIC _sk_gather_g8_avx
 _sk_gather_g8_avx LABEL PROC
@@ -8581,10 +8496,10 @@
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,97,48,0,0         ; vbroadcastss  0x3061(%rip),%ymm1        # 6aac <_sk_callback_avx+0x384>
+  DB  196,226,125,24,13,189,47,0,0        ; vbroadcastss  0x2fbd(%rip),%ymm1        # 68e4 <_sk_callback_avx+0x384>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,86,48,0,0         ; vbroadcastss  0x3056(%rip),%ymm3        # 6ab0 <_sk_callback_avx+0x388>
+  DB  196,226,125,24,29,178,47,0,0        ; vbroadcastss  0x2fb2(%rip),%ymm3        # 68e8 <_sk_callback_avx+0x388>
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  91                                  ; pop           %rbx
@@ -8598,9 +8513,9 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  73,137,192                          ; mov           %rax,%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  116,5                               ; je            3a7a <_sk_gather_i8_avx+0xf>
+  DB  116,5                               ; je            3956 <_sk_gather_i8_avx+0xf>
   DB  76,137,192                          ; mov           %r8,%rax
-  DB  235,2                               ; jmp           3a7c <_sk_gather_i8_avx+0x11>
+  DB  235,2                               ; jmp           3958 <_sk_gather_i8_avx+0x11>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,87                               ; push          %r15
   DB  65,86                               ; push          %r14
@@ -8662,10 +8577,10 @@
   DB  196,163,121,34,4,163,2              ; vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   DB  196,163,121,34,28,19,3              ; vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,26,49,0,0             ; vmovaps       0x311a(%rip),%ymm10        # 6cc0 <_sk_callback_avx+0x598>
+  DB  197,124,40,21,30,49,0,0             ; vmovaps       0x311e(%rip),%ymm10        # 6ba0 <_sk_callback_avx+0x640>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,252,46,0,0         ; vbroadcastss  0x2efc(%rip),%ymm9        # 6ab4 <_sk_callback_avx+0x38c>
+  DB  196,98,125,24,13,88,46,0,0          ; vbroadcastss  0x2e58(%rip),%ymm9        # 68ec <_sk_callback_avx+0x38c>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -8697,38 +8612,38 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,128,0,0,0                    ; jne           3cb0 <_sk_load_565_avx+0x8e>
+  DB  15,133,128,0,0,0                    ; jne           3b8c <_sk_load_565_avx+0x8e>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,102,46,0,0         ; vbroadcastss  0x2e66(%rip),%ymm0        # 6ab8 <_sk_callback_avx+0x390>
+  DB  196,226,125,24,5,194,45,0,0         ; vbroadcastss  0x2dc2(%rip),%ymm0        # 68f0 <_sk_callback_avx+0x390>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,89,46,0,0         ; vbroadcastss  0x2e59(%rip),%ymm1        # 6abc <_sk_callback_avx+0x394>
+  DB  196,226,125,24,13,181,45,0,0        ; vbroadcastss  0x2db5(%rip),%ymm1        # 68f4 <_sk_callback_avx+0x394>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,80,46,0,0         ; vbroadcastss  0x2e50(%rip),%ymm1        # 6ac0 <_sk_callback_avx+0x398>
+  DB  196,226,125,24,13,172,45,0,0        ; vbroadcastss  0x2dac(%rip),%ymm1        # 68f8 <_sk_callback_avx+0x398>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,67,46,0,0         ; vbroadcastss  0x2e43(%rip),%ymm3        # 6ac4 <_sk_callback_avx+0x39c>
+  DB  196,226,125,24,29,159,45,0,0        ; vbroadcastss  0x2d9f(%rip),%ymm3        # 68fc <_sk_callback_avx+0x39c>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,58,46,0,0         ; vbroadcastss  0x2e3a(%rip),%ymm3        # 6ac8 <_sk_callback_avx+0x3a0>
+  DB  196,226,125,24,29,150,45,0,0        ; vbroadcastss  0x2d96(%rip),%ymm3        # 6900 <_sk_callback_avx+0x3a0>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,45,46,0,0         ; vbroadcastss  0x2e2d(%rip),%ymm3        # 6acc <_sk_callback_avx+0x3a4>
+  DB  196,226,125,24,29,137,45,0,0        ; vbroadcastss  0x2d89(%rip),%ymm3        # 6904 <_sk_callback_avx+0x3a4>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,34,46,0,0         ; vbroadcastss  0x2e22(%rip),%ymm3        # 6ad0 <_sk_callback_avx+0x3a8>
+  DB  196,226,125,24,29,126,45,0,0        ; vbroadcastss  0x2d7e(%rip),%ymm3        # 6908 <_sk_callback_avx+0x3a8>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,110,255,255,255              ; ja            3c36 <_sk_load_565_avx+0x14>
+  DB  15,135,110,255,255,255              ; ja            3b12 <_sk_load_565_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 3d1c <_sk_load_565_avx+0xfa>
+  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 3bf8 <_sk_load_565_avx+0xfa>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8740,7 +8655,7 @@
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,26,255,255,255                  ; jmpq          3c36 <_sk_load_565_avx+0x14>
+  DB  233,26,255,255,255                  ; jmpq          3b12 <_sk_load_565_avx+0x14>
   DB  244                                 ; hlt
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -8816,23 +8731,23 @@
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,194,44,0,0         ; vbroadcastss  0x2cc2(%rip),%ymm0        # 6ad4 <_sk_callback_avx+0x3ac>
+  DB  196,226,125,24,5,30,44,0,0          ; vbroadcastss  0x2c1e(%rip),%ymm0        # 690c <_sk_callback_avx+0x3ac>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,181,44,0,0        ; vbroadcastss  0x2cb5(%rip),%ymm1        # 6ad8 <_sk_callback_avx+0x3b0>
+  DB  196,226,125,24,13,17,44,0,0         ; vbroadcastss  0x2c11(%rip),%ymm1        # 6910 <_sk_callback_avx+0x3b0>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,172,44,0,0        ; vbroadcastss  0x2cac(%rip),%ymm1        # 6adc <_sk_callback_avx+0x3b4>
+  DB  196,226,125,24,13,8,44,0,0          ; vbroadcastss  0x2c08(%rip),%ymm1        # 6914 <_sk_callback_avx+0x3b4>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,159,44,0,0        ; vbroadcastss  0x2c9f(%rip),%ymm3        # 6ae0 <_sk_callback_avx+0x3b8>
+  DB  196,226,125,24,29,251,43,0,0        ; vbroadcastss  0x2bfb(%rip),%ymm3        # 6918 <_sk_callback_avx+0x3b8>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,150,44,0,0        ; vbroadcastss  0x2c96(%rip),%ymm3        # 6ae4 <_sk_callback_avx+0x3bc>
+  DB  196,226,125,24,29,242,43,0,0        ; vbroadcastss  0x2bf2(%rip),%ymm3        # 691c <_sk_callback_avx+0x3bc>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,137,44,0,0        ; vbroadcastss  0x2c89(%rip),%ymm3        # 6ae8 <_sk_callback_avx+0x3c0>
+  DB  196,226,125,24,29,229,43,0,0        ; vbroadcastss  0x2be5(%rip),%ymm3        # 6920 <_sk_callback_avx+0x3c0>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,126,44,0,0        ; vbroadcastss  0x2c7e(%rip),%ymm3        # 6aec <_sk_callback_avx+0x3c4>
+  DB  196,226,125,24,29,218,43,0,0        ; vbroadcastss  0x2bda(%rip),%ymm3        # 6924 <_sk_callback_avx+0x3c4>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,94                               ; pop           %r14
@@ -8844,14 +8759,14 @@
 _sk_store_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,106,44,0,0          ; vbroadcastss  0x2c6a(%rip),%ymm8        # 6af0 <_sk_callback_avx+0x3c8>
+  DB  196,98,125,24,5,198,43,0,0          ; vbroadcastss  0x2bc6(%rip),%ymm8        # 6928 <_sk_callback_avx+0x3c8>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
   DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
   DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  DB  196,98,125,24,21,67,44,0,0          ; vbroadcastss  0x2c43(%rip),%ymm10        # 6af4 <_sk_callback_avx+0x3cc>
+  DB  196,98,125,24,21,159,43,0,0         ; vbroadcastss  0x2b9f(%rip),%ymm10        # 692c <_sk_callback_avx+0x3cc>
   DB  196,65,116,89,210                   ; vmulps        %ymm10,%ymm1,%ymm10
   DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
   DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
@@ -8865,7 +8780,7 @@
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           3f01 <_sk_store_565_avx+0x89>
+  DB  117,10                              ; jne           3ddd <_sk_store_565_avx+0x89>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8873,9 +8788,9 @@
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            3efd <_sk_store_565_avx+0x85>
+  DB  119,236                             ; ja            3dd9 <_sk_store_565_avx+0x85>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 3f60 <_sk_store_565_avx+0xe8>
+  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 3e3c <_sk_store_565_avx+0xe8>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8886,7 +8801,7 @@
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           3efd <_sk_store_565_avx+0x85>
+  DB  235,159                             ; jmp           3dd9 <_sk_store_565_avx+0x85>
   DB  102,144                             ; xchg          %ax,%ax
   DB  245                                 ; cmc
   DB  255                                 ; (bad)
@@ -8917,31 +8832,31 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,152,0,0,0                    ; jne           4022 <_sk_load_4444_avx+0xa6>
+  DB  15,133,152,0,0,0                    ; jne           3efe <_sk_load_4444_avx+0xa6>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,76,43,0,0          ; vbroadcastss  0x2b4c(%rip),%ymm0        # 6af8 <_sk_callback_avx+0x3d0>
+  DB  196,226,125,24,5,168,42,0,0         ; vbroadcastss  0x2aa8(%rip),%ymm0        # 6930 <_sk_callback_avx+0x3d0>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,63,43,0,0         ; vbroadcastss  0x2b3f(%rip),%ymm1        # 6afc <_sk_callback_avx+0x3d4>
+  DB  196,226,125,24,13,155,42,0,0        ; vbroadcastss  0x2a9b(%rip),%ymm1        # 6934 <_sk_callback_avx+0x3d4>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,54,43,0,0         ; vbroadcastss  0x2b36(%rip),%ymm1        # 6b00 <_sk_callback_avx+0x3d8>
+  DB  196,226,125,24,13,146,42,0,0        ; vbroadcastss  0x2a92(%rip),%ymm1        # 6938 <_sk_callback_avx+0x3d8>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,41,43,0,0         ; vbroadcastss  0x2b29(%rip),%ymm2        # 6b04 <_sk_callback_avx+0x3dc>
+  DB  196,226,125,24,21,133,42,0,0        ; vbroadcastss  0x2a85(%rip),%ymm2        # 693c <_sk_callback_avx+0x3dc>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,32,43,0,0         ; vbroadcastss  0x2b20(%rip),%ymm2        # 6b08 <_sk_callback_avx+0x3e0>
+  DB  196,226,125,24,21,124,42,0,0        ; vbroadcastss  0x2a7c(%rip),%ymm2        # 6940 <_sk_callback_avx+0x3e0>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,19,43,0,0           ; vbroadcastss  0x2b13(%rip),%ymm8        # 6b0c <_sk_callback_avx+0x3e4>
+  DB  196,98,125,24,5,111,42,0,0          ; vbroadcastss  0x2a6f(%rip),%ymm8        # 6944 <_sk_callback_avx+0x3e4>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,9,43,0,0            ; vbroadcastss  0x2b09(%rip),%ymm8        # 6b10 <_sk_callback_avx+0x3e8>
+  DB  196,98,125,24,5,101,42,0,0          ; vbroadcastss  0x2a65(%rip),%ymm8        # 6948 <_sk_callback_avx+0x3e8>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,251,42,0,0          ; vbroadcastss  0x2afb(%rip),%ymm8        # 6b14 <_sk_callback_avx+0x3ec>
+  DB  196,98,125,24,5,87,42,0,0           ; vbroadcastss  0x2a57(%rip),%ymm8        # 694c <_sk_callback_avx+0x3ec>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8950,9 +8865,9 @@
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,86,255,255,255               ; ja            3f90 <_sk_load_4444_avx+0x14>
+  DB  15,135,86,255,255,255               ; ja            3e6c <_sk_load_4444_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 4090 <_sk_load_4444_avx+0x114>
+  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 3f6c <_sk_load_4444_avx+0x114>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8964,7 +8879,7 @@
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,2,255,255,255                   ; jmpq          3f90 <_sk_load_4444_avx+0x14>
+  DB  233,2,255,255,255                   ; jmpq          3e6c <_sk_load_4444_avx+0x14>
   DB  102,144                             ; xchg          %ax,%ax
   DB  242,255                             ; repnz         (bad)
   DB  255                                 ; (bad)
@@ -9041,25 +8956,25 @@
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,146,41,0,0         ; vbroadcastss  0x2992(%rip),%ymm0        # 6b18 <_sk_callback_avx+0x3f0>
+  DB  196,226,125,24,5,238,40,0,0         ; vbroadcastss  0x28ee(%rip),%ymm0        # 6950 <_sk_callback_avx+0x3f0>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,133,41,0,0        ; vbroadcastss  0x2985(%rip),%ymm1        # 6b1c <_sk_callback_avx+0x3f4>
+  DB  196,226,125,24,13,225,40,0,0        ; vbroadcastss  0x28e1(%rip),%ymm1        # 6954 <_sk_callback_avx+0x3f4>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,124,41,0,0        ; vbroadcastss  0x297c(%rip),%ymm1        # 6b20 <_sk_callback_avx+0x3f8>
+  DB  196,226,125,24,13,216,40,0,0        ; vbroadcastss  0x28d8(%rip),%ymm1        # 6958 <_sk_callback_avx+0x3f8>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,111,41,0,0        ; vbroadcastss  0x296f(%rip),%ymm2        # 6b24 <_sk_callback_avx+0x3fc>
+  DB  196,226,125,24,21,203,40,0,0        ; vbroadcastss  0x28cb(%rip),%ymm2        # 695c <_sk_callback_avx+0x3fc>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,102,41,0,0        ; vbroadcastss  0x2966(%rip),%ymm2        # 6b28 <_sk_callback_avx+0x400>
+  DB  196,226,125,24,21,194,40,0,0        ; vbroadcastss  0x28c2(%rip),%ymm2        # 6960 <_sk_callback_avx+0x400>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,89,41,0,0           ; vbroadcastss  0x2959(%rip),%ymm8        # 6b2c <_sk_callback_avx+0x404>
+  DB  196,98,125,24,5,181,40,0,0          ; vbroadcastss  0x28b5(%rip),%ymm8        # 6964 <_sk_callback_avx+0x404>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,79,41,0,0           ; vbroadcastss  0x294f(%rip),%ymm8        # 6b30 <_sk_callback_avx+0x408>
+  DB  196,98,125,24,5,171,40,0,0          ; vbroadcastss  0x28ab(%rip),%ymm8        # 6968 <_sk_callback_avx+0x408>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,65,41,0,0           ; vbroadcastss  0x2941(%rip),%ymm8        # 6b34 <_sk_callback_avx+0x40c>
+  DB  196,98,125,24,5,157,40,0,0          ; vbroadcastss  0x289d(%rip),%ymm8        # 696c <_sk_callback_avx+0x40c>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -9073,7 +8988,7 @@
 _sk_store_4444_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,38,41,0,0           ; vbroadcastss  0x2926(%rip),%ymm8        # 6b38 <_sk_callback_avx+0x410>
+  DB  196,98,125,24,5,130,40,0,0          ; vbroadcastss  0x2882(%rip),%ymm8        # 6970 <_sk_callback_avx+0x410>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,12               ; vpslld        $0xc,%xmm9,%xmm10
@@ -9100,7 +9015,7 @@
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           42ab <_sk_store_4444_avx+0xa7>
+  DB  117,10                              ; jne           4187 <_sk_store_4444_avx+0xa7>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -9108,9 +9023,9 @@
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            42a7 <_sk_store_4444_avx+0xa3>
+  DB  119,236                             ; ja            4183 <_sk_store_4444_avx+0xa3>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,66,0,0,0                  ; lea           0x42(%rip),%r9        # 4308 <_sk_store_4444_avx+0x104>
+  DB  76,141,13,66,0,0,0                  ; lea           0x42(%rip),%r9        # 41e4 <_sk_store_4444_avx+0x104>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -9121,7 +9036,7 @@
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           42a7 <_sk_store_4444_avx+0xa3>
+  DB  235,159                             ; jmp           4183 <_sk_store_4444_avx+0xa3>
   DB  247,255                             ; idiv          %edi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -9147,87 +9062,55 @@
 
 PUBLIC _sk_load_8888_avx
 _sk_load_8888_avx LABEL PROC
+  DB  80                                  ; push          %rax
+  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,139,16                           ; mov           (%rax),%r10
-  DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,135,0,0,0                    ; jne           43b9 <_sk_load_8888_avx+0x95>
-  DB  196,65,124,16,12,186                ; vmovups       (%r10,%rdi,4),%ymm9
-  DB  197,124,40,21,160,41,0,0            ; vmovaps       0x29a0(%rip),%ymm10        # 6ce0 <_sk_callback_avx+0x5b8>
-  DB  196,193,52,84,194                   ; vandps        %ymm10,%ymm9,%ymm0
+  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
+  DB  76,3,8                              ; add           (%rax),%r9
+  DB  77,133,192                          ; test          %r8,%r8
+  DB  15,133,139,0,0,0                    ; jne           42a5 <_sk_load_8888_avx+0xa5>
+  DB  196,193,124,16,25                   ; vmovups       (%r9),%ymm3
+  DB  197,124,40,21,153,41,0,0            ; vmovaps       0x2999(%rip),%ymm10        # 6bc0 <_sk_callback_avx+0x660>
+  DB  196,193,100,84,194                  ; vandps        %ymm10,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,234,39,0,0          ; vbroadcastss  0x27ea(%rip),%ymm8        # 6b3c <_sk_callback_avx+0x414>
+  DB  196,98,125,24,5,59,39,0,0           ; vbroadcastss  0x273b(%rip),%ymm8        # 6974 <_sk_callback_avx+0x414>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,193,113,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm1
-  DB  196,99,125,25,203,1                 ; vextractf128  $0x1,%ymm9,%xmm3
-  DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
+  DB  197,241,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm1
+  DB  196,195,125,25,217,1                ; vextractf128  $0x1,%ymm3,%xmm9
+  DB  196,193,105,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm2
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  196,193,116,84,202                  ; vandps        %ymm10,%ymm1,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,193,33,114,209,16               ; vpsrld        $0x10,%xmm9,%xmm11
-  DB  197,233,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm2
+  DB  197,161,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm11
+  DB  196,193,105,114,209,16              ; vpsrld        $0x10,%xmm9,%xmm2
   DB  196,227,37,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm11,%ymm2
   DB  196,193,108,84,210                  ; vandps        %ymm10,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,193,49,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm9
-  DB  197,225,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm3
-  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
+  DB  197,169,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm10
+  DB  196,193,97,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm3
+  DB  196,227,45,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm10,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  76,137,193                          ; mov           %r8,%rcx
+  DB  65,88                               ; pop           %r8
   DB  255,224                             ; jmpq          *%rax
-  DB  65,137,200                          ; mov           %ecx,%r8d
-  DB  65,128,224,7                        ; and           $0x7,%r8b
-  DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
-  DB  65,254,200                          ; dec           %r8b
-  DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,102,255,255,255              ; ja            4338 <_sk_load_8888_avx+0x14>
-  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,139,0,0,0                 ; lea           0x8b(%rip),%r9        # 4468 <_sk_load_8888_avx+0x144>
-  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
-  DB  76,1,200                            ; add           %r9,%rax
-  DB  255,224                             ; jmpq          *%rax
-  DB  196,193,121,110,68,186,24           ; vmovd         0x18(%r10,%rdi,4),%xmm0
-  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
-  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
-  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
-  DB  196,99,117,12,200,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm9
-  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
-  DB  196,195,121,34,68,186,20,1          ; vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
-  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
-  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
-  DB  196,195,121,34,68,186,16,0          ; vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
-  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
-  DB  196,195,49,34,68,186,12,3           ; vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
-  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  DB  196,195,49,34,68,186,8,2            ; vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
-  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  DB  196,195,49,34,68,186,4,1            ; vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
-  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  DB  196,195,49,34,4,186,0               ; vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
-  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  DB  233,210,254,255,255                 ; jmpq          4338 <_sk_load_8888_avx+0x14>
-  DB  102,144                             ; xchg          %ax,%ax
-  DB  236                                 ; in            (%dx),%al
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  222,255                             ; fdivrp        %st,%st(7)
-  DB  255                                 ; (bad)
-  DB  255,208                             ; callq         *%rax
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,194                             ; inc           %edx
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,174,255,255,255,154             ; ljmp          *-0x65000001(%rsi)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  126,255                             ; jle           4481 <_sk_load_8888_avx+0x15d>
-  DB  255                                 ; (bad)
-  DB  255                                 ; .byte         0xff
+  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
+  DB  68,41,193                           ; sub           %r8d,%ecx
+  DB  192,225,3                           ; shl           $0x3,%cl
+  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
+  DB  72,211,232                          ; shr           %cl,%rax
+  DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
+  DB  196,226,121,48,192                  ; vpmovzxbw     %xmm0,%xmm0
+  DB  196,226,121,0,13,67,40,0,0          ; vpshufb       0x2843(%rip),%xmm0,%xmm1        # 6b10 <_sk_callback_avx+0x5b0>
+  DB  196,226,121,33,201                  ; vpmovsxbd     %xmm1,%xmm1
+  DB  196,226,121,0,5,69,40,0,0           ; vpshufb       0x2845(%rip),%xmm0,%xmm0        # 6b20 <_sk_callback_avx+0x5c0>
+  DB  196,226,121,33,192                  ; vpmovsxbd     %xmm0,%xmm0
+  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
+  DB  196,194,125,44,25                   ; vmaskmovps    (%r9),%ymm0,%ymm3
+  DB  233,47,255,255,255                  ; jmpq          421f <_sk_load_8888_avx+0x1f>
 
 PUBLIC _sk_gather_8888_avx
 _sk_gather_8888_avx LABEL PROC
@@ -9268,10 +9151,10 @@
   DB  196,131,121,34,4,152,2              ; vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   DB  196,131,121,34,28,144,3             ; vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,202,39,0,0            ; vmovaps       0x27ca(%rip),%ymm10        # 6d00 <_sk_callback_avx+0x5d8>
+  DB  197,124,40,21,62,40,0,0             ; vmovaps       0x283e(%rip),%ymm10        # 6be0 <_sk_callback_avx+0x680>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,248,37,0,0         ; vbroadcastss  0x25f8(%rip),%ymm9        # 6b40 <_sk_callback_avx+0x418>
+  DB  196,98,125,24,13,196,37,0,0         ; vbroadcastss  0x25c4(%rip),%ymm9        # 6978 <_sk_callback_avx+0x418>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -9299,9 +9182,12 @@
 
 PUBLIC _sk_store_8888_avx
 _sk_store_8888_avx LABEL PROC
+  DB  80                                  ; push          %rax
+  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,134,37,0,0          ; vbroadcastss  0x2586(%rip),%ymm8        # 6b44 <_sk_callback_avx+0x41c>
+  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
+  DB  76,3,8                              ; add           (%rax),%r9
+  DB  196,98,125,24,5,70,37,0,0           ; vbroadcastss  0x2546(%rip),%ymm8        # 697c <_sk_callback_avx+0x41c>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,65,116,89,208                   ; vmulps        %ymm8,%ymm1,%ymm10
@@ -9325,56 +9211,27 @@
   DB  196,67,37,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm11,%ymm8
   DB  196,65,45,86,192                    ; vorpd         %ymm8,%ymm10,%ymm8
   DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
-  DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           464c <_sk_store_8888_avx+0x9c>
-  DB  196,65,124,17,4,186                 ; vmovups       %ymm8,(%r10,%rdi,4)
+  DB  77,133,192                          ; test          %r8,%r8
+  DB  117,14                              ; jne           44c8 <_sk_store_8888_avx+0xac>
+  DB  196,65,124,17,1                     ; vmovups       %ymm8,(%r9)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  76,137,193                          ; mov           %r8,%rcx
+  DB  65,88                               ; pop           %r8
   DB  255,224                             ; jmpq          *%rax
-  DB  65,137,200                          ; mov           %ecx,%r8d
-  DB  65,128,224,7                        ; and           $0x7,%r8b
-  DB  65,254,200                          ; dec           %r8b
-  DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            4648 <_sk_store_8888_avx+0x98>
-  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,85,0,0,0                  ; lea           0x55(%rip),%r9        # 46bc <_sk_store_8888_avx+0x10c>
-  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
-  DB  76,1,200                            ; add           %r9,%rax
-  DB  255,224                             ; jmpq          *%rax
-  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
-  DB  196,67,121,22,76,186,24,2           ; vpextrd       $0x2,%xmm9,0x18(%r10,%rdi,4)
-  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
-  DB  196,67,121,22,76,186,20,1           ; vpextrd       $0x1,%xmm9,0x14(%r10,%rdi,4)
-  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
-  DB  196,65,122,17,76,186,16             ; vmovss        %xmm9,0x10(%r10,%rdi,4)
-  DB  196,67,121,22,68,186,12,3           ; vpextrd       $0x3,%xmm8,0xc(%r10,%rdi,4)
-  DB  196,67,121,22,68,186,8,2            ; vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
-  DB  196,67,121,22,68,186,4,1            ; vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
-  DB  196,65,121,126,4,186                ; vmovd         %xmm8,(%r10,%rdi,4)
-  DB  235,143                             ; jmp           4648 <_sk_store_8888_avx+0x98>
-  DB  15,31,0                             ; nopl          (%rax)
-  DB  245                                 ; cmc
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  237                                 ; in            (%dx),%eax
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,229                             ; jmpq          *%rbp
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  221,255                             ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,208                             ; callq         *%rax
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255,194                             ; inc           %edx
-  DB  255                                 ; (bad)
-  DB  255                                 ; (bad)
-  DB  255                                 ; .byte         0xff
-  DB  180,255                             ; mov           $0xff,%ah
-  DB  255                                 ; (bad)
-  DB  255                                 ; .byte         0xff
+  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
+  DB  68,41,193                           ; sub           %r8d,%ecx
+  DB  192,225,3                           ; shl           $0x3,%cl
+  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
+  DB  72,211,232                          ; shr           %cl,%rax
+  DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
+  DB  196,66,121,48,201                   ; vpmovzxbw     %xmm9,%xmm9
+  DB  196,98,49,0,21,64,38,0,0            ; vpshufb       0x2640(%rip),%xmm9,%xmm10        # 6b30 <_sk_callback_avx+0x5d0>
+  DB  196,66,121,33,210                   ; vpmovsxbd     %xmm10,%xmm10
+  DB  196,98,49,0,13,66,38,0,0            ; vpshufb       0x2642(%rip),%xmm9,%xmm9        # 6b40 <_sk_callback_avx+0x5e0>
+  DB  196,66,121,33,201                   ; vpmovsxbd     %xmm9,%xmm9
+  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
+  DB  196,66,53,46,1                      ; vmaskmovps    %ymm8,%ymm9,(%r9)
+  DB  235,175                             ; jmp           44bf <_sk_store_8888_avx+0xa3>
 
 PUBLIC _sk_load_f16_avx
 _sk_load_f16_avx LABEL PROC
@@ -9386,7 +9243,7 @@
   DB  197,252,17,116,36,64                ; vmovups       %ymm6,0x40(%rsp)
   DB  197,252,17,108,36,32                ; vmovups       %ymm5,0x20(%rsp)
   DB  197,254,127,36,36                   ; vmovdqu       %ymm4,(%rsp)
-  DB  15,133,143,2,0,0                    ; jne           4993 <_sk_load_f16_avx+0x2bb>
+  DB  15,133,143,2,0,0                    ; jne           47cb <_sk_load_f16_avx+0x2bb>
   DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
   DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
   DB  197,249,16,76,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm1
@@ -9404,13 +9261,13 @@
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  DB  196,98,125,24,37,235,35,0,0         ; vbroadcastss  0x23eb(%rip),%ymm12        # 6b48 <_sk_callback_avx+0x420>
+  DB  196,98,125,24,37,235,35,0,0         ; vbroadcastss  0x23eb(%rip),%ymm12        # 6980 <_sk_callback_avx+0x420>
   DB  196,193,124,84,204                  ; vandps        %ymm12,%ymm0,%ymm1
   DB  197,252,87,193                      ; vxorps        %ymm1,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,215,35,0,0         ; vbroadcastss  0x23d7(%rip),%xmm11        # 6b4c <_sk_callback_avx+0x424>
+  DB  196,98,121,24,29,215,35,0,0         ; vbroadcastss  0x23d7(%rip),%xmm11        # 6984 <_sk_callback_avx+0x424>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,205,35,0,0         ; vbroadcastss  0x23cd(%rip),%xmm13        # 6b50 <_sk_callback_avx+0x428>
+  DB  196,98,121,24,45,205,35,0,0         ; vbroadcastss  0x23cd(%rip),%xmm13        # 6988 <_sk_callback_avx+0x428>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -9424,7 +9281,7 @@
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,193                      ; vorps         %ymm1,%ymm0,%ymm0
   DB  196,227,125,25,193,1                ; vextractf128  $0x1,%ymm0,%xmm1
-  DB  196,226,121,24,29,131,35,0,0        ; vbroadcastss  0x2383(%rip),%xmm3        # 6b54 <_sk_callback_avx+0x42c>
+  DB  196,226,121,24,29,131,35,0,0        ; vbroadcastss  0x2383(%rip),%xmm3        # 698c <_sk_callback_avx+0x42c>
   DB  197,241,254,203                     ; vpaddd        %xmm3,%xmm1,%xmm1
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -9517,29 +9374,29 @@
   DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,79                              ; je            49f2 <_sk_load_f16_avx+0x31a>
+  DB  116,79                              ; je            482a <_sk_load_f16_avx+0x31a>
   DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,67                              ; jb            49f2 <_sk_load_f16_avx+0x31a>
+  DB  114,67                              ; jb            482a <_sk_load_f16_avx+0x31a>
   DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,68                              ; je            49ff <_sk_load_f16_avx+0x327>
+  DB  116,68                              ; je            4837 <_sk_load_f16_avx+0x327>
   DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,56                              ; jb            49ff <_sk_load_f16_avx+0x327>
+  DB  114,56                              ; jb            4837 <_sk_load_f16_avx+0x327>
   DB  197,251,16,76,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,68,253,255,255               ; je            471b <_sk_load_f16_avx+0x43>
+  DB  15,132,68,253,255,255               ; je            4553 <_sk_load_f16_avx+0x43>
   DB  197,241,22,76,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm1,%xmm1
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,52,253,255,255               ; jb            471b <_sk_load_f16_avx+0x43>
+  DB  15,130,52,253,255,255               ; jb            4553 <_sk_load_f16_avx+0x43>
   DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
-  DB  233,41,253,255,255                  ; jmpq          471b <_sk_load_f16_avx+0x43>
+  DB  233,41,253,255,255                  ; jmpq          4553 <_sk_load_f16_avx+0x43>
   DB  197,241,87,201                      ; vxorpd        %xmm1,%xmm1,%xmm1
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,28,253,255,255                  ; jmpq          471b <_sk_load_f16_avx+0x43>
+  DB  233,28,253,255,255                  ; jmpq          4553 <_sk_load_f16_avx+0x43>
   DB  197,241,87,201                      ; vxorpd        %xmm1,%xmm1,%xmm1
-  DB  233,19,253,255,255                  ; jmpq          471b <_sk_load_f16_avx+0x43>
+  DB  233,19,253,255,255                  ; jmpq          4553 <_sk_load_f16_avx+0x43>
 
 PUBLIC _sk_gather_f16_avx
 _sk_gather_f16_avx LABEL PROC
@@ -9601,13 +9458,13 @@
   DB  197,249,105,210                     ; vpunpckhwd    %xmm2,%xmm0,%xmm2
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  DB  196,98,125,24,37,67,32,0,0          ; vbroadcastss  0x2043(%rip),%ymm12        # 6b58 <_sk_callback_avx+0x430>
+  DB  196,98,125,24,37,67,32,0,0          ; vbroadcastss  0x2043(%rip),%ymm12        # 6990 <_sk_callback_avx+0x430>
   DB  196,193,124,84,212                  ; vandps        %ymm12,%ymm0,%ymm2
   DB  197,252,87,194                      ; vxorps        %ymm2,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,47,32,0,0          ; vbroadcastss  0x202f(%rip),%xmm11        # 6b5c <_sk_callback_avx+0x434>
+  DB  196,98,121,24,29,47,32,0,0          ; vbroadcastss  0x202f(%rip),%xmm11        # 6994 <_sk_callback_avx+0x434>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,37,32,0,0          ; vbroadcastss  0x2025(%rip),%xmm13        # 6b60 <_sk_callback_avx+0x438>
+  DB  196,98,121,24,45,37,32,0,0          ; vbroadcastss  0x2025(%rip),%xmm13        # 6998 <_sk_callback_avx+0x438>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -9621,7 +9478,7 @@
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,194                      ; vorps         %ymm2,%ymm0,%ymm0
   DB  196,227,125,25,194,1                ; vextractf128  $0x1,%ymm0,%xmm2
-  DB  196,226,121,24,29,219,31,0,0        ; vbroadcastss  0x1fdb(%rip),%xmm3        # 6b64 <_sk_callback_avx+0x43c>
+  DB  196,226,121,24,29,219,31,0,0        ; vbroadcastss  0x1fdb(%rip),%xmm3        # 699c <_sk_callback_avx+0x43c>
   DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -9723,12 +9580,12 @@
   DB  197,252,17,180,36,128,0,0,0         ; vmovups       %ymm6,0x80(%rsp)
   DB  197,252,17,108,36,96                ; vmovups       %ymm5,0x60(%rsp)
   DB  197,252,17,100,36,64                ; vmovups       %ymm4,0x40(%rsp)
-  DB  196,98,125,24,13,232,29,0,0         ; vbroadcastss  0x1de8(%rip),%ymm9        # 6b68 <_sk_callback_avx+0x440>
+  DB  196,98,125,24,13,232,29,0,0         ; vbroadcastss  0x1de8(%rip),%ymm9        # 69a0 <_sk_callback_avx+0x440>
   DB  196,65,124,84,209                   ; vandps        %ymm9,%ymm0,%ymm10
   DB  197,252,17,4,36                     ; vmovups       %ymm0,(%rsp)
   DB  196,65,124,87,218                   ; vxorps        %ymm10,%ymm0,%ymm11
   DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
-  DB  196,98,121,24,5,206,29,0,0          ; vbroadcastss  0x1dce(%rip),%xmm8        # 6b6c <_sk_callback_avx+0x444>
+  DB  196,98,121,24,5,206,29,0,0          ; vbroadcastss  0x1dce(%rip),%xmm8        # 69a4 <_sk_callback_avx+0x444>
   DB  196,65,57,102,236                   ; vpcmpgtd      %xmm12,%xmm8,%xmm13
   DB  196,65,57,102,243                   ; vpcmpgtd      %xmm11,%xmm8,%xmm14
   DB  196,67,13,24,237,1                  ; vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -9738,7 +9595,7 @@
   DB  196,67,13,24,242,1                  ; vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
   DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
-  DB  196,98,125,24,21,149,29,0,0         ; vbroadcastss  0x1d95(%rip),%ymm10        # 6b70 <_sk_callback_avx+0x448>
+  DB  196,98,125,24,21,149,29,0,0         ; vbroadcastss  0x1d95(%rip),%ymm10        # 69a8 <_sk_callback_avx+0x448>
   DB  196,65,12,86,242                    ; vorps         %ymm10,%ymm14,%ymm14
   DB  196,67,125,25,247,1                 ; vextractf128  $0x1,%ymm14,%xmm15
   DB  196,65,1,254,228                    ; vpaddd        %xmm12,%xmm15,%xmm12
@@ -9820,7 +9677,7 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,75                              ; jne           4fc2 <_sk_store_f16_avx+0x270>
+  DB  117,75                              ; jne           4dfa <_sk_store_f16_avx+0x270>
   DB  197,120,17,28,248                   ; vmovups       %xmm11,(%rax,%rdi,8)
   DB  197,120,17,84,248,16                ; vmovups       %xmm10,0x10(%rax,%rdi,8)
   DB  197,120,17,76,248,32                ; vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -9836,22 +9693,22 @@
   DB  255,224                             ; jmpq          *%rax
   DB  197,121,214,28,248                  ; vmovq         %xmm11,(%rax,%rdi,8)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,193                             ; je            4f8e <_sk_store_f16_avx+0x23c>
+  DB  116,193                             ; je            4dc6 <_sk_store_f16_avx+0x23c>
   DB  197,121,23,92,248,8                 ; vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,181                             ; jb            4f8e <_sk_store_f16_avx+0x23c>
+  DB  114,181                             ; jb            4dc6 <_sk_store_f16_avx+0x23c>
   DB  197,121,214,84,248,16               ; vmovq         %xmm10,0x10(%rax,%rdi,8)
-  DB  116,173                             ; je            4f8e <_sk_store_f16_avx+0x23c>
+  DB  116,173                             ; je            4dc6 <_sk_store_f16_avx+0x23c>
   DB  197,121,23,84,248,24                ; vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,161                             ; jb            4f8e <_sk_store_f16_avx+0x23c>
+  DB  114,161                             ; jb            4dc6 <_sk_store_f16_avx+0x23c>
   DB  197,121,214,76,248,32               ; vmovq         %xmm9,0x20(%rax,%rdi,8)
-  DB  116,153                             ; je            4f8e <_sk_store_f16_avx+0x23c>
+  DB  116,153                             ; je            4dc6 <_sk_store_f16_avx+0x23c>
   DB  197,121,23,76,248,40                ; vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,141                             ; jb            4f8e <_sk_store_f16_avx+0x23c>
+  DB  114,141                             ; jb            4dc6 <_sk_store_f16_avx+0x23c>
   DB  197,121,214,68,248,48               ; vmovq         %xmm8,0x30(%rax,%rdi,8)
-  DB  235,133                             ; jmp           4f8e <_sk_store_f16_avx+0x23c>
+  DB  235,133                             ; jmp           4dc6 <_sk_store_f16_avx+0x23c>
 
 PUBLIC _sk_load_u16_be_avx
 _sk_load_u16_be_avx LABEL PROC
@@ -9859,7 +9716,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,253,0,0,0                    ; jne           511c <_sk_load_u16_be_avx+0x113>
+  DB  15,133,253,0,0,0                    ; jne           4f54 <_sk_load_u16_be_avx+0x113>
   DB  196,65,121,16,4,64                  ; vmovupd       (%r8,%rax,2),%xmm8
   DB  196,193,121,16,84,64,16             ; vmovupd       0x10(%r8,%rax,2),%xmm2
   DB  196,193,121,16,92,64,32             ; vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -9881,7 +9738,7 @@
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,228,26,0,0         ; vbroadcastss  0x1ae4(%rip),%ymm11        # 6b74 <_sk_callback_avx+0x44c>
+  DB  196,98,125,24,29,228,26,0,0         ; vbroadcastss  0x1ae4(%rip),%ymm11        # 69ac <_sk_callback_avx+0x44c>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,177,109,202                     ; vpunpckhqdq   %xmm2,%xmm9,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -9915,29 +9772,29 @@
   DB  196,65,123,16,4,64                  ; vmovsd        (%r8,%rax,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            5182 <_sk_load_u16_be_avx+0x179>
+  DB  116,85                              ; je            4fba <_sk_load_u16_be_avx+0x179>
   DB  196,65,57,22,68,64,8                ; vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            5182 <_sk_load_u16_be_avx+0x179>
+  DB  114,72                              ; jb            4fba <_sk_load_u16_be_avx+0x179>
   DB  196,193,123,16,84,64,16             ; vmovsd        0x10(%r8,%rax,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            518f <_sk_load_u16_be_avx+0x186>
+  DB  116,72                              ; je            4fc7 <_sk_load_u16_be_avx+0x186>
   DB  196,193,105,22,84,64,24             ; vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            518f <_sk_load_u16_be_avx+0x186>
+  DB  114,59                              ; jb            4fc7 <_sk_load_u16_be_avx+0x186>
   DB  196,193,123,16,92,64,32             ; vmovsd        0x20(%r8,%rax,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,213,254,255,255              ; je            503a <_sk_load_u16_be_avx+0x31>
+  DB  15,132,213,254,255,255              ; je            4e72 <_sk_load_u16_be_avx+0x31>
   DB  196,193,97,22,92,64,40              ; vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,196,254,255,255              ; jb            503a <_sk_load_u16_be_avx+0x31>
+  DB  15,130,196,254,255,255              ; jb            4e72 <_sk_load_u16_be_avx+0x31>
   DB  196,65,122,126,76,64,48             ; vmovq         0x30(%r8,%rax,2),%xmm9
-  DB  233,184,254,255,255                 ; jmpq          503a <_sk_load_u16_be_avx+0x31>
+  DB  233,184,254,255,255                 ; jmpq          4e72 <_sk_load_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,171,254,255,255                 ; jmpq          503a <_sk_load_u16_be_avx+0x31>
+  DB  233,171,254,255,255                 ; jmpq          4e72 <_sk_load_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,162,254,255,255                 ; jmpq          503a <_sk_load_u16_be_avx+0x31>
+  DB  233,162,254,255,255                 ; jmpq          4e72 <_sk_load_u16_be_avx+0x31>
 
 PUBLIC _sk_load_rgb_u16_be_avx
 _sk_load_rgb_u16_be_avx LABEL PROC
@@ -9945,7 +9802,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,127                        ; lea           (%rdi,%rdi,2),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,243,0,0,0                    ; jne           529d <_sk_load_rgb_u16_be_avx+0x105>
+  DB  15,133,243,0,0,0                    ; jne           50d5 <_sk_load_rgb_u16_be_avx+0x105>
   DB  196,193,122,111,4,64                ; vmovdqu       (%r8,%rax,2),%xmm0
   DB  196,193,122,111,84,64,12            ; vmovdqu       0xc(%r8,%rax,2),%xmm2
   DB  196,193,122,111,76,64,24            ; vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -9972,7 +9829,7 @@
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,68,25,0,0          ; vbroadcastss  0x1944(%rip),%ymm11        # 6b78 <_sk_callback_avx+0x450>
+  DB  196,98,125,24,29,68,25,0,0          ; vbroadcastss  0x1944(%rip),%ymm11        # 69b0 <_sk_callback_avx+0x450>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -9993,48 +9850,48 @@
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,225,24,0,0        ; vbroadcastss  0x18e1(%rip),%ymm3        # 6b7c <_sk_callback_avx+0x454>
+  DB  196,226,125,24,29,225,24,0,0        ; vbroadcastss  0x18e1(%rip),%ymm3        # 69b4 <_sk_callback_avx+0x454>
   DB  255,224                             ; jmpq          *%rax
   DB  196,193,121,110,4,64                ; vmovd         (%r8,%rax,2),%xmm0
   DB  196,193,121,196,68,64,4,2           ; vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           52b6 <_sk_load_rgb_u16_be_avx+0x11e>
-  DB  233,40,255,255,255                  ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,5                               ; jne           50ee <_sk_load_rgb_u16_be_avx+0x11e>
+  DB  233,40,255,255,255                  ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,76,64,6             ; vmovd         0x6(%r8,%rax,2),%xmm1
   DB  196,65,113,196,68,64,10,2           ; vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            52e5 <_sk_load_rgb_u16_be_avx+0x14d>
+  DB  114,26                              ; jb            511d <_sk_load_rgb_u16_be_avx+0x14d>
   DB  196,193,121,110,76,64,12            ; vmovd         0xc(%r8,%rax,2),%xmm1
   DB  196,193,113,196,84,64,16,2          ; vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           52ea <_sk_load_rgb_u16_be_avx+0x152>
-  DB  233,249,254,255,255                 ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,244,254,255,255                 ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           5122 <_sk_load_rgb_u16_be_avx+0x152>
+  DB  233,249,254,255,255                 ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,244,254,255,255                 ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,76,64,18            ; vmovd         0x12(%r8,%rax,2),%xmm1
   DB  196,65,113,196,76,64,22,2           ; vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            5319 <_sk_load_rgb_u16_be_avx+0x181>
+  DB  114,26                              ; jb            5151 <_sk_load_rgb_u16_be_avx+0x181>
   DB  196,193,121,110,76,64,24            ; vmovd         0x18(%r8,%rax,2),%xmm1
   DB  196,193,113,196,76,64,28,2          ; vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           531e <_sk_load_rgb_u16_be_avx+0x186>
-  DB  233,197,254,255,255                 ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,192,254,255,255                 ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           5156 <_sk_load_rgb_u16_be_avx+0x186>
+  DB  233,197,254,255,255                 ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,192,254,255,255                 ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,92,64,30            ; vmovd         0x1e(%r8,%rax,2),%xmm3
   DB  196,65,97,196,92,64,34,2            ; vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            5347 <_sk_load_rgb_u16_be_avx+0x1af>
+  DB  114,20                              ; jb            517f <_sk_load_rgb_u16_be_avx+0x1af>
   DB  196,193,121,110,92,64,36            ; vmovd         0x24(%r8,%rax,2),%xmm3
   DB  196,193,97,196,92,64,40,2           ; vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  DB  233,151,254,255,255                 ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,146,254,255,255                 ; jmpq          51de <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,151,254,255,255                 ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,146,254,255,255                 ; jmpq          5016 <_sk_load_rgb_u16_be_avx+0x46>
 
 PUBLIC _sk_store_u16_be_avx
 _sk_store_u16_be_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
-  DB  196,98,125,24,5,30,24,0,0           ; vbroadcastss  0x181e(%rip),%ymm8        # 6b80 <_sk_callback_avx+0x458>
+  DB  196,98,125,24,5,30,24,0,0           ; vbroadcastss  0x181e(%rip),%ymm8        # 69b8 <_sk_callback_avx+0x458>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
@@ -10072,7 +9929,7 @@
   DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
   DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,31                              ; jne           5446 <_sk_store_u16_be_avx+0xfa>
+  DB  117,31                              ; jne           527e <_sk_store_u16_be_avx+0xfa>
   DB  196,65,120,17,28,64                 ; vmovups       %xmm11,(%r8,%rax,2)
   DB  196,65,120,17,84,64,16              ; vmovups       %xmm10,0x10(%r8,%rax,2)
   DB  196,65,120,17,76,64,32              ; vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -10081,31 +9938,31 @@
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,214,28,64                ; vmovq         %xmm11,(%r8,%rax,2)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            5442 <_sk_store_u16_be_avx+0xf6>
+  DB  116,240                             ; je            527a <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,92,64,8               ; vmovhpd       %xmm11,0x8(%r8,%rax,2)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            5442 <_sk_store_u16_be_avx+0xf6>
+  DB  114,227                             ; jb            527a <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,84,64,16             ; vmovq         %xmm10,0x10(%r8,%rax,2)
-  DB  116,218                             ; je            5442 <_sk_store_u16_be_avx+0xf6>
+  DB  116,218                             ; je            527a <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,84,64,24              ; vmovhpd       %xmm10,0x18(%r8,%rax,2)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            5442 <_sk_store_u16_be_avx+0xf6>
+  DB  114,205                             ; jb            527a <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,76,64,32             ; vmovq         %xmm9,0x20(%r8,%rax,2)
-  DB  116,196                             ; je            5442 <_sk_store_u16_be_avx+0xf6>
+  DB  116,196                             ; je            527a <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,76,64,40              ; vmovhpd       %xmm9,0x28(%r8,%rax,2)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,183                             ; jb            5442 <_sk_store_u16_be_avx+0xf6>
+  DB  114,183                             ; jb            527a <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,68,64,48             ; vmovq         %xmm8,0x30(%r8,%rax,2)
-  DB  235,174                             ; jmp           5442 <_sk_store_u16_be_avx+0xf6>
+  DB  235,174                             ; jmp           527a <_sk_store_u16_be_avx+0xf6>
 
 PUBLIC _sk_load_f32_avx
 _sk_load_f32_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  119,110                             ; ja            550a <_sk_load_f32_avx+0x76>
+  DB  119,110                             ; ja            5342 <_sk_load_f32_avx+0x76>
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 5534 <_sk_load_f32_avx+0xa0>
+  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 536c <_sk_load_f32_avx+0xa0>
   DB  73,99,4,138                         ; movslq        (%r10,%rcx,4),%rax
   DB  76,1,208                            ; add           %r10,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -10162,7 +10019,7 @@
   DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
   DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,55                              ; jne           55c1 <_sk_store_f32_avx+0x6d>
+  DB  117,55                              ; jne           53f9 <_sk_store_f32_avx+0x6d>
   DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -10175,22 +10032,22 @@
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            55bd <_sk_store_f32_avx+0x69>
+  DB  116,240                             ; je            53f5 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            55bd <_sk_store_f32_avx+0x69>
+  DB  114,227                             ; jb            53f5 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
-  DB  116,218                             ; je            55bd <_sk_store_f32_avx+0x69>
+  DB  116,218                             ; je            53f5 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            55bd <_sk_store_f32_avx+0x69>
+  DB  114,205                             ; jb            53f5 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  DB  116,195                             ; je            55bd <_sk_store_f32_avx+0x69>
+  DB  116,195                             ; je            53f5 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,181                             ; jb            55bd <_sk_store_f32_avx+0x69>
+  DB  114,181                             ; jb            53f5 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  DB  235,171                             ; jmp           55bd <_sk_store_f32_avx+0x69>
+  DB  235,171                             ; jmp           53f5 <_sk_store_f32_avx+0x69>
 
 PUBLIC _sk_clamp_x_avx
 _sk_clamp_x_avx LABEL PROC
@@ -10280,7 +10137,7 @@
 _sk_clamp_x_1_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  197,188,95,192                      ; vmaxps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,78,20,0,0           ; vbroadcastss  0x144e(%rip),%ymm8        # 6b84 <_sk_callback_avx+0x45c>
+  DB  196,98,125,24,5,78,20,0,0           ; vbroadcastss  0x144e(%rip),%ymm8        # 69bc <_sk_callback_avx+0x45c>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -10294,9 +10151,9 @@
 
 PUBLIC _sk_mirror_x_1_avx
 _sk_mirror_x_1_avx LABEL PROC
-  DB  196,98,125,24,5,49,20,0,0           ; vbroadcastss  0x1431(%rip),%ymm8        # 6b88 <_sk_callback_avx+0x460>
+  DB  196,98,125,24,5,49,20,0,0           ; vbroadcastss  0x1431(%rip),%ymm8        # 69c0 <_sk_callback_avx+0x460>
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,13,39,20,0,0          ; vbroadcastss  0x1427(%rip),%ymm9        # 6b8c <_sk_callback_avx+0x464>
+  DB  196,98,125,24,13,39,20,0,0          ; vbroadcastss  0x1427(%rip),%ymm9        # 69c4 <_sk_callback_avx+0x464>
   DB  196,65,124,89,201                   ; vmulps        %ymm9,%ymm0,%ymm9
   DB  196,67,125,8,201,1                  ; vroundps      $0x1,%ymm9,%ymm9
   DB  196,65,52,88,201                    ; vaddps        %ymm9,%ymm9,%ymm9
@@ -10310,12 +10167,12 @@
 
 PUBLIC _sk_luminance_to_alpha_avx
 _sk_luminance_to_alpha_avx LABEL PROC
-  DB  196,226,125,24,29,247,19,0,0        ; vbroadcastss  0x13f7(%rip),%ymm3        # 6b90 <_sk_callback_avx+0x468>
+  DB  196,226,125,24,29,247,19,0,0        ; vbroadcastss  0x13f7(%rip),%ymm3        # 69c8 <_sk_callback_avx+0x468>
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,29,238,19,0,0        ; vbroadcastss  0x13ee(%rip),%ymm3        # 6b94 <_sk_callback_avx+0x46c>
+  DB  196,226,125,24,29,238,19,0,0        ; vbroadcastss  0x13ee(%rip),%ymm3        # 69cc <_sk_callback_avx+0x46c>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,225,19,0,0        ; vbroadcastss  0x13e1(%rip),%ymm1        # 6b98 <_sk_callback_avx+0x470>
+  DB  196,226,125,24,13,225,19,0,0        ; vbroadcastss  0x13e1(%rip),%ymm1        # 69d0 <_sk_callback_avx+0x470>
   DB  197,236,89,201                      ; vmulps        %ymm1,%ymm2,%ymm1
   DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10522,9 +10379,9 @@
   DB  72,139,24                           ; mov           (%rax),%rbx
   DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
   DB  72,255,203                          ; dec           %rbx
-  DB  120,7                               ; js            5b0d <_sk_evenly_spaced_gradient_avx+0x1f>
+  DB  120,7                               ; js            5945 <_sk_evenly_spaced_gradient_avx+0x1f>
   DB  196,225,242,42,203                  ; vcvtsi2ss     %rbx,%xmm1,%xmm1
-  DB  235,21                              ; jmp           5b22 <_sk_evenly_spaced_gradient_avx+0x34>
+  DB  235,21                              ; jmp           595a <_sk_evenly_spaced_gradient_avx+0x34>
   DB  73,137,216                          ; mov           %rbx,%r8
   DB  73,209,232                          ; shr           %r8
   DB  131,227,1                           ; and           $0x1,%ebx
@@ -10679,18 +10536,18 @@
 
 PUBLIC _sk_gauss_a_to_rgba_avx
 _sk_gauss_a_to_rgba_avx LABEL PROC
-  DB  196,226,125,24,5,76,13,0,0          ; vbroadcastss  0xd4c(%rip),%ymm0        # 6b9c <_sk_callback_avx+0x474>
+  DB  196,226,125,24,5,76,13,0,0          ; vbroadcastss  0xd4c(%rip),%ymm0        # 69d4 <_sk_callback_avx+0x474>
   DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
-  DB  196,226,125,24,13,67,13,0,0         ; vbroadcastss  0xd43(%rip),%ymm1        # 6ba0 <_sk_callback_avx+0x478>
+  DB  196,226,125,24,13,67,13,0,0         ; vbroadcastss  0xd43(%rip),%ymm1        # 69d8 <_sk_callback_avx+0x478>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,13,54,13,0,0         ; vbroadcastss  0xd36(%rip),%ymm1        # 6ba4 <_sk_callback_avx+0x47c>
+  DB  196,226,125,24,13,54,13,0,0         ; vbroadcastss  0xd36(%rip),%ymm1        # 69dc <_sk_callback_avx+0x47c>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,13,41,13,0,0         ; vbroadcastss  0xd29(%rip),%ymm1        # 6ba8 <_sk_callback_avx+0x480>
+  DB  196,226,125,24,13,41,13,0,0         ; vbroadcastss  0xd29(%rip),%ymm1        # 69e0 <_sk_callback_avx+0x480>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,13,28,13,0,0         ; vbroadcastss  0xd1c(%rip),%ymm1        # 6bac <_sk_callback_avx+0x484>
+  DB  196,226,125,24,13,28,13,0,0         ; vbroadcastss  0xd1c(%rip),%ymm1        # 69e4 <_sk_callback_avx+0x484>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
@@ -10710,12 +10567,12 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,80                              ; jb            5f0d <_sk_gradient_avx+0x69>
+  DB  114,80                              ; jb            5d45 <_sk_gradient_avx+0x69>
   DB  72,139,88,72                        ; mov           0x48(%rax),%rbx
   DB  73,255,200                          ; dec           %r8
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
-  DB  196,98,125,24,21,218,12,0,0         ; vbroadcastss  0xcda(%rip),%ymm10        # 6bb0 <_sk_callback_avx+0x488>
+  DB  196,98,125,24,21,218,12,0,0         ; vbroadcastss  0xcda(%rip),%ymm10        # 69e8 <_sk_callback_avx+0x488>
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  196,98,125,24,3                     ; vbroadcastss  (%rbx),%ymm8
   DB  197,60,194,192,2                    ; vcmpleps      %ymm0,%ymm8,%ymm8
@@ -10727,7 +10584,7 @@
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  73,255,200                          ; dec           %r8
-  DB  117,205                             ; jne           5eda <_sk_gradient_avx+0x36>
+  DB  117,205                             ; jne           5d12 <_sk_gradient_avx+0x36>
   DB  196,195,249,22,200,1                ; vpextrq       $0x1,%xmm1,%r8
   DB  69,137,193                          ; mov           %r8d,%r9d
   DB  73,193,232,32                       ; shr           $0x20,%r8
@@ -10905,27 +10762,27 @@
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,254,8,0,0          ; vbroadcastss  0x8fe(%rip),%ymm13        # 6bb4 <_sk_callback_avx+0x48c>
+  DB  196,98,125,24,45,254,8,0,0          ; vbroadcastss  0x8fe(%rip),%ymm13        # 69ec <_sk_callback_avx+0x48c>
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,244,8,0,0          ; vbroadcastss  0x8f4(%rip),%ymm14        # 6bb8 <_sk_callback_avx+0x490>
+  DB  196,98,125,24,53,244,8,0,0          ; vbroadcastss  0x8f4(%rip),%ymm14        # 69f0 <_sk_callback_avx+0x490>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,229,8,0,0          ; vbroadcastss  0x8e5(%rip),%ymm14        # 6bbc <_sk_callback_avx+0x494>
+  DB  196,98,125,24,53,229,8,0,0          ; vbroadcastss  0x8e5(%rip),%ymm14        # 69f4 <_sk_callback_avx+0x494>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,229                    ; vmulps        %ymm13,%ymm12,%ymm12
-  DB  196,98,125,24,45,214,8,0,0          ; vbroadcastss  0x8d6(%rip),%ymm13        # 6bc0 <_sk_callback_avx+0x498>
+  DB  196,98,125,24,45,214,8,0,0          ; vbroadcastss  0x8d6(%rip),%ymm13        # 69f8 <_sk_callback_avx+0x498>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,193,8,0,0          ; vbroadcastss  0x8c1(%rip),%ymm10        # 6bc4 <_sk_callback_avx+0x49c>
+  DB  196,98,125,24,21,193,8,0,0          ; vbroadcastss  0x8c1(%rip),%ymm10        # 69fc <_sk_callback_avx+0x49c>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,171,8,0,0          ; vbroadcastss  0x8ab(%rip),%ymm10        # 6bc8 <_sk_callback_avx+0x4a0>
+  DB  196,98,125,24,21,171,8,0,0          ; vbroadcastss  0x8ab(%rip),%ymm10        # 6a00 <_sk_callback_avx+0x4a0>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,149,8,0,0          ; vbroadcastss  0x895(%rip),%ymm10        # 6bcc <_sk_callback_avx+0x4a4>
+  DB  196,98,125,24,21,149,8,0,0          ; vbroadcastss  0x895(%rip),%ymm10        # 6a04 <_sk_callback_avx+0x4a4>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -10945,7 +10802,7 @@
 PUBLIC _sk_save_xy_avx
 _sk_save_xy_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,95,8,0,0            ; vbroadcastss  0x85f(%rip),%ymm8        # 6bd0 <_sk_callback_avx+0x4a8>
+  DB  196,98,125,24,5,95,8,0,0            ; vbroadcastss  0x85f(%rip),%ymm8        # 6a08 <_sk_callback_avx+0x4a8>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -10978,9 +10835,9 @@
 PUBLIC _sk_bilinear_nx_avx
 _sk_bilinear_nx_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,235,7,0,0          ; vbroadcastss  0x7eb(%rip),%ymm0        # 6bd4 <_sk_callback_avx+0x4ac>
+  DB  196,226,125,24,5,235,7,0,0          ; vbroadcastss  0x7eb(%rip),%ymm0        # 6a0c <_sk_callback_avx+0x4ac>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,226,7,0,0           ; vbroadcastss  0x7e2(%rip),%ymm8        # 6bd8 <_sk_callback_avx+0x4b0>
+  DB  196,98,125,24,5,226,7,0,0           ; vbroadcastss  0x7e2(%rip),%ymm8        # 6a10 <_sk_callback_avx+0x4b0>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10989,7 +10846,7 @@
 PUBLIC _sk_bilinear_px_avx
 _sk_bilinear_px_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,202,7,0,0          ; vbroadcastss  0x7ca(%rip),%ymm0        # 6bdc <_sk_callback_avx+0x4b4>
+  DB  196,226,125,24,5,202,7,0,0          ; vbroadcastss  0x7ca(%rip),%ymm0        # 6a14 <_sk_callback_avx+0x4b4>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10999,9 +10856,9 @@
 PUBLIC _sk_bilinear_ny_avx
 _sk_bilinear_ny_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,174,7,0,0         ; vbroadcastss  0x7ae(%rip),%ymm1        # 6be0 <_sk_callback_avx+0x4b8>
+  DB  196,226,125,24,13,174,7,0,0         ; vbroadcastss  0x7ae(%rip),%ymm1        # 6a18 <_sk_callback_avx+0x4b8>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,164,7,0,0           ; vbroadcastss  0x7a4(%rip),%ymm8        # 6be4 <_sk_callback_avx+0x4bc>
+  DB  196,98,125,24,5,164,7,0,0           ; vbroadcastss  0x7a4(%rip),%ymm8        # 6a1c <_sk_callback_avx+0x4bc>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -11010,7 +10867,7 @@
 PUBLIC _sk_bilinear_py_avx
 _sk_bilinear_py_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,140,7,0,0         ; vbroadcastss  0x78c(%rip),%ymm1        # 6be8 <_sk_callback_avx+0x4c0>
+  DB  196,226,125,24,13,140,7,0,0         ; vbroadcastss  0x78c(%rip),%ymm1        # 6a20 <_sk_callback_avx+0x4c0>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -11020,14 +10877,14 @@
 PUBLIC _sk_bicubic_n3x_avx
 _sk_bicubic_n3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,111,7,0,0          ; vbroadcastss  0x76f(%rip),%ymm0        # 6bec <_sk_callback_avx+0x4c4>
+  DB  196,226,125,24,5,111,7,0,0          ; vbroadcastss  0x76f(%rip),%ymm0        # 6a24 <_sk_callback_avx+0x4c4>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,102,7,0,0           ; vbroadcastss  0x766(%rip),%ymm8        # 6bf0 <_sk_callback_avx+0x4c8>
+  DB  196,98,125,24,5,102,7,0,0           ; vbroadcastss  0x766(%rip),%ymm8        # 6a28 <_sk_callback_avx+0x4c8>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,87,7,0,0           ; vbroadcastss  0x757(%rip),%ymm10        # 6bf4 <_sk_callback_avx+0x4cc>
+  DB  196,98,125,24,21,87,7,0,0           ; vbroadcastss  0x757(%rip),%ymm10        # 6a2c <_sk_callback_avx+0x4cc>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,77,7,0,0           ; vbroadcastss  0x74d(%rip),%ymm10        # 6bf8 <_sk_callback_avx+0x4d0>
+  DB  196,98,125,24,21,77,7,0,0           ; vbroadcastss  0x74d(%rip),%ymm10        # 6a30 <_sk_callback_avx+0x4d0>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -11037,19 +10894,19 @@
 PUBLIC _sk_bicubic_n1x_avx
 _sk_bicubic_n1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,48,7,0,0           ; vbroadcastss  0x730(%rip),%ymm0        # 6bfc <_sk_callback_avx+0x4d4>
+  DB  196,226,125,24,5,48,7,0,0           ; vbroadcastss  0x730(%rip),%ymm0        # 6a34 <_sk_callback_avx+0x4d4>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,39,7,0,0            ; vbroadcastss  0x727(%rip),%ymm8        # 6c00 <_sk_callback_avx+0x4d8>
+  DB  196,98,125,24,5,39,7,0,0            ; vbroadcastss  0x727(%rip),%ymm8        # 6a38 <_sk_callback_avx+0x4d8>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,29,7,0,0           ; vbroadcastss  0x71d(%rip),%ymm9        # 6c04 <_sk_callback_avx+0x4dc>
+  DB  196,98,125,24,13,29,7,0,0           ; vbroadcastss  0x71d(%rip),%ymm9        # 6a3c <_sk_callback_avx+0x4dc>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,19,7,0,0           ; vbroadcastss  0x713(%rip),%ymm10        # 6c08 <_sk_callback_avx+0x4e0>
+  DB  196,98,125,24,21,19,7,0,0           ; vbroadcastss  0x713(%rip),%ymm10        # 6a40 <_sk_callback_avx+0x4e0>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,4,7,0,0            ; vbroadcastss  0x704(%rip),%ymm10        # 6c0c <_sk_callback_avx+0x4e4>
+  DB  196,98,125,24,21,4,7,0,0            ; vbroadcastss  0x704(%rip),%ymm10        # 6a44 <_sk_callback_avx+0x4e4>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,245,6,0,0          ; vbroadcastss  0x6f5(%rip),%ymm9        # 6c10 <_sk_callback_avx+0x4e8>
+  DB  196,98,125,24,13,245,6,0,0          ; vbroadcastss  0x6f5(%rip),%ymm9        # 6a48 <_sk_callback_avx+0x4e8>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -11058,17 +10915,17 @@
 PUBLIC _sk_bicubic_p1x_avx
 _sk_bicubic_p1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,221,6,0,0           ; vbroadcastss  0x6dd(%rip),%ymm8        # 6c14 <_sk_callback_avx+0x4ec>
+  DB  196,98,125,24,5,221,6,0,0           ; vbroadcastss  0x6dd(%rip),%ymm8        # 6a4c <_sk_callback_avx+0x4ec>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,72,64                    ; vmovups       0x40(%rax),%ymm9
-  DB  196,98,125,24,21,207,6,0,0          ; vbroadcastss  0x6cf(%rip),%ymm10        # 6c18 <_sk_callback_avx+0x4f0>
+  DB  196,98,125,24,21,207,6,0,0          ; vbroadcastss  0x6cf(%rip),%ymm10        # 6a50 <_sk_callback_avx+0x4f0>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,197,6,0,0          ; vbroadcastss  0x6c5(%rip),%ymm11        # 6c1c <_sk_callback_avx+0x4f4>
+  DB  196,98,125,24,29,197,6,0,0          ; vbroadcastss  0x6c5(%rip),%ymm11        # 6a54 <_sk_callback_avx+0x4f4>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,172,6,0,0          ; vbroadcastss  0x6ac(%rip),%ymm9        # 6c20 <_sk_callback_avx+0x4f8>
+  DB  196,98,125,24,13,172,6,0,0          ; vbroadcastss  0x6ac(%rip),%ymm9        # 6a58 <_sk_callback_avx+0x4f8>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -11077,13 +10934,13 @@
 PUBLIC _sk_bicubic_p3x_avx
 _sk_bicubic_p3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,148,6,0,0          ; vbroadcastss  0x694(%rip),%ymm0        # 6c24 <_sk_callback_avx+0x4fc>
+  DB  196,226,125,24,5,148,6,0,0          ; vbroadcastss  0x694(%rip),%ymm0        # 6a5c <_sk_callback_avx+0x4fc>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,129,6,0,0          ; vbroadcastss  0x681(%rip),%ymm10        # 6c28 <_sk_callback_avx+0x500>
+  DB  196,98,125,24,21,129,6,0,0          ; vbroadcastss  0x681(%rip),%ymm10        # 6a60 <_sk_callback_avx+0x500>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,119,6,0,0          ; vbroadcastss  0x677(%rip),%ymm10        # 6c2c <_sk_callback_avx+0x504>
+  DB  196,98,125,24,21,119,6,0,0          ; vbroadcastss  0x677(%rip),%ymm10        # 6a64 <_sk_callback_avx+0x504>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -11093,14 +10950,14 @@
 PUBLIC _sk_bicubic_n3y_avx
 _sk_bicubic_n3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,90,6,0,0          ; vbroadcastss  0x65a(%rip),%ymm1        # 6c30 <_sk_callback_avx+0x508>
+  DB  196,226,125,24,13,90,6,0,0          ; vbroadcastss  0x65a(%rip),%ymm1        # 6a68 <_sk_callback_avx+0x508>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,80,6,0,0            ; vbroadcastss  0x650(%rip),%ymm8        # 6c34 <_sk_callback_avx+0x50c>
+  DB  196,98,125,24,5,80,6,0,0            ; vbroadcastss  0x650(%rip),%ymm8        # 6a6c <_sk_callback_avx+0x50c>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,65,6,0,0           ; vbroadcastss  0x641(%rip),%ymm10        # 6c38 <_sk_callback_avx+0x510>
+  DB  196,98,125,24,21,65,6,0,0           ; vbroadcastss  0x641(%rip),%ymm10        # 6a70 <_sk_callback_avx+0x510>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,55,6,0,0           ; vbroadcastss  0x637(%rip),%ymm10        # 6c3c <_sk_callback_avx+0x514>
+  DB  196,98,125,24,21,55,6,0,0           ; vbroadcastss  0x637(%rip),%ymm10        # 6a74 <_sk_callback_avx+0x514>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -11110,19 +10967,19 @@
 PUBLIC _sk_bicubic_n1y_avx
 _sk_bicubic_n1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,26,6,0,0          ; vbroadcastss  0x61a(%rip),%ymm1        # 6c40 <_sk_callback_avx+0x518>
+  DB  196,226,125,24,13,26,6,0,0          ; vbroadcastss  0x61a(%rip),%ymm1        # 6a78 <_sk_callback_avx+0x518>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,16,6,0,0            ; vbroadcastss  0x610(%rip),%ymm8        # 6c44 <_sk_callback_avx+0x51c>
+  DB  196,98,125,24,5,16,6,0,0            ; vbroadcastss  0x610(%rip),%ymm8        # 6a7c <_sk_callback_avx+0x51c>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,6,6,0,0            ; vbroadcastss  0x606(%rip),%ymm9        # 6c48 <_sk_callback_avx+0x520>
+  DB  196,98,125,24,13,6,6,0,0            ; vbroadcastss  0x606(%rip),%ymm9        # 6a80 <_sk_callback_avx+0x520>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,252,5,0,0          ; vbroadcastss  0x5fc(%rip),%ymm10        # 6c4c <_sk_callback_avx+0x524>
+  DB  196,98,125,24,21,252,5,0,0          ; vbroadcastss  0x5fc(%rip),%ymm10        # 6a84 <_sk_callback_avx+0x524>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,237,5,0,0          ; vbroadcastss  0x5ed(%rip),%ymm10        # 6c50 <_sk_callback_avx+0x528>
+  DB  196,98,125,24,21,237,5,0,0          ; vbroadcastss  0x5ed(%rip),%ymm10        # 6a88 <_sk_callback_avx+0x528>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,222,5,0,0          ; vbroadcastss  0x5de(%rip),%ymm9        # 6c54 <_sk_callback_avx+0x52c>
+  DB  196,98,125,24,13,222,5,0,0          ; vbroadcastss  0x5de(%rip),%ymm9        # 6a8c <_sk_callback_avx+0x52c>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -11131,17 +10988,17 @@
 PUBLIC _sk_bicubic_p1y_avx
 _sk_bicubic_p1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,198,5,0,0           ; vbroadcastss  0x5c6(%rip),%ymm8        # 6c58 <_sk_callback_avx+0x530>
+  DB  196,98,125,24,5,198,5,0,0           ; vbroadcastss  0x5c6(%rip),%ymm8        # 6a90 <_sk_callback_avx+0x530>
   DB  197,188,88,72,32                    ; vaddps        0x20(%rax),%ymm8,%ymm1
   DB  197,124,16,72,96                    ; vmovups       0x60(%rax),%ymm9
-  DB  196,98,125,24,21,183,5,0,0          ; vbroadcastss  0x5b7(%rip),%ymm10        # 6c5c <_sk_callback_avx+0x534>
+  DB  196,98,125,24,21,183,5,0,0          ; vbroadcastss  0x5b7(%rip),%ymm10        # 6a94 <_sk_callback_avx+0x534>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,173,5,0,0          ; vbroadcastss  0x5ad(%rip),%ymm11        # 6c60 <_sk_callback_avx+0x538>
+  DB  196,98,125,24,29,173,5,0,0          ; vbroadcastss  0x5ad(%rip),%ymm11        # 6a98 <_sk_callback_avx+0x538>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,148,5,0,0          ; vbroadcastss  0x594(%rip),%ymm9        # 6c64 <_sk_callback_avx+0x53c>
+  DB  196,98,125,24,13,148,5,0,0          ; vbroadcastss  0x594(%rip),%ymm9        # 6a9c <_sk_callback_avx+0x53c>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -11150,13 +11007,13 @@
 PUBLIC _sk_bicubic_p3y_avx
 _sk_bicubic_p3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,124,5,0,0         ; vbroadcastss  0x57c(%rip),%ymm1        # 6c68 <_sk_callback_avx+0x540>
+  DB  196,226,125,24,13,124,5,0,0         ; vbroadcastss  0x57c(%rip),%ymm1        # 6aa0 <_sk_callback_avx+0x540>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,104,5,0,0          ; vbroadcastss  0x568(%rip),%ymm10        # 6c6c <_sk_callback_avx+0x544>
+  DB  196,98,125,24,21,104,5,0,0          ; vbroadcastss  0x568(%rip),%ymm10        # 6aa4 <_sk_callback_avx+0x544>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,94,5,0,0           ; vbroadcastss  0x55e(%rip),%ymm10        # 6c70 <_sk_callback_avx+0x548>
+  DB  196,98,125,24,21,94,5,0,0           ; vbroadcastss  0x55e(%rip),%ymm10        # 6aa8 <_sk_callback_avx+0x548>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -11270,25 +11127,25 @@
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 68f5 <.literal4+0xb1>
+  DB  71,225,61                           ; rex.RXB       loope 672d <.literal4+0xb1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6905 <.literal4+0xc1>
+  DB  71,225,61                           ; rex.RXB       loope 673d <.literal4+0xc1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6915 <.literal4+0xd1>
+  DB  71,225,61                           ; rex.RXB       loope 674d <.literal4+0xd1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6925 <.literal4+0xe1>
+  DB  71,225,61                           ; rex.RXB       loope 675d <.literal4+0xe1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,127                    ; add           %al,0x7f00003f(%rax)
@@ -11340,7 +11197,7 @@
   DB  190,129,128,128,59                  ; mov           $0x3b808081,%esi
   DB  129,128,128,59,0,248,0,0,8,33       ; addl          $0x21080000,-0x7ffc480(%rax)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        6979 <.literal4+0x135>
+  DB  224,7                               ; loopne        67b1 <.literal4+0x135>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -11356,10 +11213,10 @@
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
   DB  0,52,255                            ; add           %dh,(%rdi,%rdi,8)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            69a0 <.literal4+0x15c>
+  DB  127,0                               ; jg            67d8 <.literal4+0x15c>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6a19 <.literal4+0x1d5>
+  DB  119,115                             ; ja            6851 <.literal4+0x1d5>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -11373,10 +11230,10 @@
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            69d4 <.literal4+0x190>
+  DB  127,0                               ; jg            680c <.literal4+0x190>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6a4d <.literal4+0x209>
+  DB  119,115                             ; ja            6885 <.literal4+0x209>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -11390,10 +11247,10 @@
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            6a08 <.literal4+0x1c4>
+  DB  127,0                               ; jg            6840 <.literal4+0x1c4>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6a81 <.literal4+0x23d>
+  DB  119,115                             ; ja            68b9 <.literal4+0x23d>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -11407,10 +11264,10 @@
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            6a3c <.literal4+0x1f8>
+  DB  127,0                               ; jg            6874 <.literal4+0x1f8>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6ab5 <.literal4+0x271>
+  DB  119,115                             ; ja            68ed <.literal4+0x271>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -11423,7 +11280,7 @@
   DB  0,75,0                              ; add           %cl,0x0(%rbx)
   DB  0,128,63,0,0,200                    ; add           %al,-0x37ffffc1(%rax)
   DB  66,0,0                              ; rex.X         add %al,(%rax)
-  DB  127,67                              ; jg            6ab3 <.literal4+0x26f>
+  DB  127,67                              ; jg            68eb <.literal4+0x26f>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -11435,10 +11292,10 @@
   DB  190,80,128,3,62                     ; mov           $0x3e038050,%esi
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           6ad3 <.literal4+0x28f>
+  DB  118,63                              ; jbe           690b <.literal4+0x28f>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            6ae7 <.literal4+0x2a3>
+  DB  127,67                              ; jg            691f <.literal4+0x2a3>
   DB  129,128,128,59,0,0,128,63,129,128   ; addl          $0x80813f80,0x3b80(%rax)
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,128,63,129,128,128                ; add           %al,-0x7f7f7ec1(%rax)
@@ -11447,7 +11304,7 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        6ac9 <.literal4+0x285>
+  DB  224,7                               ; loopne        6901 <.literal4+0x285>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -11459,7 +11316,7 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        6ae5 <.literal4+0x2a1>
+  DB  224,7                               ; loopne        691d <.literal4+0x2a1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -11470,7 +11327,7 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            6b3a <.literal4+0x2f6>
+  DB  124,66                              ; jl            6972 <.literal4+0x2f6>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,55,0,15                 ; mov           %ecx,0xf003788(%rax)
@@ -11488,9 +11345,9 @@
   DB  137,136,136,59,15,0                 ; mov           %ecx,0xf3b88(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,61,0,0                  ; mov           %ecx,0x3d88(%rax)
-  DB  112,65                              ; jo            6b7d <.literal4+0x339>
+  DB  112,65                              ; jo            69b5 <.literal4+0x339>
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            6b8b <.literal4+0x347>
+  DB  127,67                              ; jg            69c3 <.literal4+0x347>
   DB  0,128,0,0,0,0                       ; add           %al,0x0(%rax)
   DB  0,128,0,4,0,128                     ; add           %al,-0x7ffffc00(%rax)
   DB  0,0                                 ; add           %al,(%rax)
@@ -11506,7 +11363,7 @@
   DB  0,128,55,0,0,128                    ; add           %al,-0x7fffffc9(%rax)
   DB  63                                  ; (bad)
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            6bcb <.literal4+0x387>
+  DB  127,71                              ; jg            6a03 <.literal4+0x387>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,191,0,0,0                     ; add           %al,0xbf(%rax)
@@ -11602,6 +11459,88 @@
   DB  170                                 ; stos          %al,%es:(%rdi)
   DB  190                                 ; .byte         0xbe
 
+ALIGN 16
+  DB  0,2                                 ; add           %al,(%rdx)
+  DB  4,6                                 ; add           $0x6,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  8,10                                ; or            %cl,(%rdx)
+  DB  12,14                               ; or            $0xe,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,2                                 ; add           %al,(%rdx)
+  DB  4,6                                 ; add           $0x6,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  8,10                                ; or            %cl,(%rdx)
+  DB  12,14                               ; or            $0xe,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  0,2                                 ; add           %al,(%rdx)
+  DB  4,6                                 ; add           $0x6,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  8,10                                ; or            %cl,(%rdx)
+  DB  12,14                               ; or            $0xe,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,2                                 ; add           %al,(%rdx)
+  DB  4,6                                 ; add           $0x6,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  8,10                                ; or            %cl,(%rdx)
+  DB  12,14                               ; or            $0xe,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,0                                 ; add           %al,(%rax)
+
 ALIGN 32
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
@@ -11683,24 +11622,6 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
-
-ALIGN 16
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
 ALIGN 32
 
 PUBLIC _sk_start_pipeline_sse41
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index 4b506ef..e570af1 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -176,8 +176,8 @@
     }
 #endif
 
-// AVX2 adds some mask loads and stores that make for shorter, faster code.
-#if defined(JUMPER) && defined(__AVX2__)
+// AVX adds some mask loads and stores that make for shorter, faster code.
+#if defined(JUMPER) && defined(__AVX__)
     SI U32 mask(size_t tail) {
         // We go a little out of our way to avoid needing large constant values here.
 
@@ -186,14 +186,16 @@
         uint64_t mask = 0xffffffffffffffff >> 8*(kStride-tail);
 
         // Sign-extend each mask lane to its full width, 0x00000000 or 0xffffffff.
-        return _mm256_cvtepi8_epi32(_mm_cvtsi64_si128((int64_t)mask));
+        using S8  = int8_t  __attribute__((ext_vector_type(8)));
+        using S32 = int32_t __attribute__((ext_vector_type(8)));
+        return (U32)__builtin_convertvector(unaligned_load<S8>(&mask), S32);
     }
 
     template <>
     inline U32 load(const uint32_t* src, size_t tail) {
         __builtin_assume(tail < kStride);
         if (__builtin_expect(tail, 0)) {
-            return _mm256_maskload_epi32((const int*)src, mask(tail));
+            return (U32)_mm256_maskload_ps((const float*)src, mask(tail));
         }
         return unaligned_load<U32>(src);
     }
@@ -202,7 +204,7 @@
     inline void store(uint32_t* dst, U32 v, size_t tail) {
         __builtin_assume(tail < kStride);
         if (__builtin_expect(tail, 0)) {
-            return _mm256_maskstore_epi32((int*)dst, mask(tail), v);
+            return _mm256_maskstore_ps((float*)dst, mask(tail), (F)v);
         }
         unaligned_store(dst, v);
     }