Some lowp refactoring

  1) Move a couple stages around in the enum to places
     that make more sense, and guass_a_to_rbga in the code too.

  2) mirror the SkRasterPipeline stage enum with either:
     LOWP(st): the stage is implemented in low precision
     TODO(st): the stage should be lowp, but isn't
     NOPE(st): the stage shouldn't be done in lowp.

  3) statically enforce that all stages are covered by one of
     LOWP, TODO, or NOPE.

Change-Id: I06c7a7e470663ef73bf652c1b65c0d3c89f0d767
Reviewed-on: https://skia-review.googlesource.com/63800
Reviewed-by: Florin Malita <fmalita@chromium.org>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index f826f39..60464b0 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -36,62 +36,63 @@
  * If you'd like to see how this works internally, you want to start digging around src/jumper.
  */
 
-#define SK_RASTER_PIPELINE_STAGES(M)                             \
-    M(callback)                                                  \
-    M(move_src_dst) M(move_dst_src)                              \
-    M(clamp_0) M(clamp_1) M(clamp_a) M(clamp_a_dst)              \
-    M(unpremul) M(premul) M(premul_dst)                          \
-    M(set_rgb) M(swap_rb) M(invert)                              \
-    M(from_srgb) M(from_srgb_dst) M(to_srgb)                     \
-    M(black_color) M(white_color) M(uniform_color)               \
-    M(seed_shader) M(dither)                                     \
-    M(load_a8)   M(load_a8_dst)   M(store_a8)   M(gather_a8)     \
-    M(load_g8)   M(load_g8_dst)                 M(gather_g8)     \
-    M(load_565)  M(load_565_dst)  M(store_565)  M(gather_565)    \
-    M(load_4444) M(load_4444_dst) M(store_4444) M(gather_4444)   \
-    M(load_f16)  M(load_f16_dst)  M(store_f16)  M(gather_f16)    \
-    M(load_f32)  M(load_f32_dst)  M(store_f32)                   \
-    M(load_8888) M(load_8888_dst) M(store_8888) M(gather_8888)   \
-    M(load_bgra) M(load_bgra_dst) M(store_bgra) M(gather_bgra)   \
-    M(load_u16_be) M(load_rgb_u16_be) M(store_u16_be)            \
-    M(load_tables_u16_be) M(load_tables_rgb_u16_be)              \
-    M(load_tables) M(load_rgba) M(store_rgba)                    \
-    M(scale_u8) M(scale_565) M(scale_1_float)                    \
-    M( lerp_u8) M( lerp_565) M( lerp_1_float)                    \
-    M(dstatop) M(dstin) M(dstout) M(dstover)                     \
-    M(srcatop) M(srcin) M(srcout) M(srcover)                     \
-    M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_)  \
-    M(colorburn) M(colordodge) M(darken) M(difference)           \
-    M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) \
-    M(hue) M(saturation) M(color) M(luminosity)                  \
-    M(srcover_rgba_8888) M(srcover_bgra_8888)                    \
-    M(luminance_to_alpha)                                        \
-    M(matrix_translate) M(matrix_scale_translate)                \
-    M(matrix_2x3) M(matrix_3x4) M(matrix_4x5) M(matrix_4x3)      \
-    M(matrix_perspective)                                        \
-    M(parametric_r) M(parametric_g) M(parametric_b)              \
-    M(parametric_a) M(gamma)                                     \
-    M(table_r) M(table_g) M(table_b) M(table_a)                  \
-    M(lab_to_xyz)                                                \
-                 M(mirror_x)   M(repeat_x)                       \
-                 M(mirror_y)   M(repeat_y)                       \
-    M(clamp_x_1) M(mirror_x_1) M(repeat_x_1)                     \
-    M(bilinear_nx) M(bilinear_px) M(bilinear_ny) M(bilinear_py)  \
-    M(bicubic_n3x) M(bicubic_n1x) M(bicubic_p1x) M(bicubic_p3x)  \
-    M(bicubic_n3y) M(bicubic_n1y) M(bicubic_p1y) M(bicubic_p3y)  \
-    M(save_xy) M(accumulate)                                     \
-    M(evenly_spaced_gradient)                                    \
-    M(gauss_a_to_rgba) M(gradient)                               \
-    M(evenly_spaced_2_stop_gradient)                             \
-    M(xy_to_unit_angle)                                          \
-    M(xy_to_radius)                                              \
-    M(xy_to_2pt_conical_quadratic_min)                           \
-    M(xy_to_2pt_conical_quadratic_max)                           \
-    M(xy_to_2pt_conical_linear)                                  \
-    M(mask_2pt_conical_degenerates) M(apply_vector_mask)         \
-    M(byte_tables) M(byte_tables_rgb)                            \
-    M(rgb_to_hsl) M(hsl_to_rgb)                                  \
-    M(clut_3D) M(clut_4D)
+#define SK_RASTER_PIPELINE_STAGES(M)                               \
+    M(callback)                                                    \
+    M(move_src_dst) M(move_dst_src)                                \
+    M(clamp_0) M(clamp_1) M(clamp_a) M(clamp_a_dst)                \
+    M(unpremul) M(premul) M(premul_dst)                            \
+    M(set_rgb) M(swap_rb) M(invert)                                \
+    M(from_srgb) M(from_srgb_dst) M(to_srgb)                       \
+    M(black_color) M(white_color) M(uniform_color)                 \
+    M(seed_shader) M(dither)                                       \
+    M(load_a8)   M(load_a8_dst)   M(store_a8)   M(gather_a8)       \
+    M(load_g8)   M(load_g8_dst)                 M(gather_g8)       \
+    M(load_565)  M(load_565_dst)  M(store_565)  M(gather_565)      \
+    M(load_4444) M(load_4444_dst) M(store_4444) M(gather_4444)     \
+    M(load_f16)  M(load_f16_dst)  M(store_f16)  M(gather_f16)      \
+    M(load_f32)  M(load_f32_dst)  M(store_f32)                     \
+    M(load_8888) M(load_8888_dst) M(store_8888) M(gather_8888)     \
+    M(load_bgra) M(load_bgra_dst) M(store_bgra) M(gather_bgra)     \
+    M(load_u16_be) M(load_rgb_u16_be) M(store_u16_be)              \
+    M(load_tables_u16_be) M(load_tables_rgb_u16_be) M(load_tables) \
+    M(load_rgba) M(store_rgba)                                     \
+    M(scale_u8) M(scale_565) M(scale_1_float)                      \
+    M( lerp_u8) M( lerp_565) M( lerp_1_float)                      \
+    M(dstatop) M(dstin) M(dstout) M(dstover)                       \
+    M(srcatop) M(srcin) M(srcout) M(srcover)                       \
+    M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_)    \
+    M(colorburn) M(colordodge) M(darken) M(difference)             \
+    M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight)   \
+    M(hue) M(saturation) M(color) M(luminosity)                    \
+    M(srcover_rgba_8888) M(srcover_bgra_8888)                      \
+    M(luminance_to_alpha)                                          \
+    M(matrix_translate) M(matrix_scale_translate)                  \
+    M(matrix_2x3) M(matrix_3x4) M(matrix_4x5) M(matrix_4x3)        \
+    M(matrix_perspective)                                          \
+    M(parametric_r) M(parametric_g) M(parametric_b)                \
+    M(parametric_a) M(gamma)                                       \
+    M(table_r) M(table_g) M(table_b) M(table_a)                    \
+    M(lab_to_xyz)                                                  \
+                 M(mirror_x)   M(repeat_x)                         \
+                 M(mirror_y)   M(repeat_y)                         \
+    M(clamp_x_1) M(mirror_x_1) M(repeat_x_1)                       \
+    M(bilinear_nx) M(bilinear_px) M(bilinear_ny) M(bilinear_py)    \
+    M(bicubic_n3x) M(bicubic_n1x) M(bicubic_p1x) M(bicubic_p3x)    \
+    M(bicubic_n3y) M(bicubic_n1y) M(bicubic_p1y) M(bicubic_p3y)    \
+    M(save_xy) M(accumulate)                                       \
+    M(evenly_spaced_gradient)                                      \
+    M(gradient)                                                    \
+    M(evenly_spaced_2_stop_gradient)                               \
+    M(xy_to_unit_angle)                                            \
+    M(xy_to_radius)                                                \
+    M(xy_to_2pt_conical_quadratic_min)                             \
+    M(xy_to_2pt_conical_quadratic_max)                             \
+    M(xy_to_2pt_conical_linear)                                    \
+    M(mask_2pt_conical_degenerates) M(apply_vector_mask)           \
+    M(byte_tables) M(byte_tables_rgb)                              \
+    M(rgb_to_hsl) M(hsl_to_rgb)                                    \
+    M(clut_3D) M(clut_4D)                                          \
+    M(gauss_a_to_rgba)
 
 class SkRasterPipeline {
 public:
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index bd46723..3faee51 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -148,13 +148,13 @@
 #if SK_JUMPER_USE_ASSEMBLY
     #if defined(__x86_64__) || defined(_M_X64)
         template <SkRasterPipeline::StockStage st>
-        static constexpr StageFn* hsw_lowp() { return nullptr; }
+        static constexpr StageFn* hsw_lowp();
 
         template <SkRasterPipeline::StockStage st>
-        static constexpr StageFn* sse41_lowp() { return nullptr; }
+        static constexpr StageFn* sse41_lowp();
 
         template <SkRasterPipeline::StockStage st>
-        static constexpr StageFn* sse2_lowp() { return nullptr; }
+        static constexpr StageFn* sse2_lowp();
 
         #define LOWP(st) \
             template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() {   \
@@ -166,72 +166,111 @@
             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
                 return ASM(st,sse2_lowp);                                       \
             }
+        #define NOPE(st) \
+            template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() {   \
+                return nullptr;                                                 \
+            }                                                                   \
+            template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
+                return nullptr;                                                 \
+            }                                                                   \
+            template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
+                return nullptr;                                                 \
+            }
 
     #elif defined(__i386__) || defined(_M_IX86)
         template <SkRasterPipeline::StockStage st>
-        static constexpr StageFn* sse2_lowp() { return nullptr; }
+        static constexpr StageFn* sse2_lowp();
 
         #define LOWP(st) \
             template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
                 return ASM(st,sse2_lowp);                                       \
             }
+        #define NOPE(st) \
+            template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() {  \
+                return nullptr;                                                 \
+            }
 
     #elif defined(JUMPER_NEON_HAS_LOWP)
         template <SkRasterPipeline::StockStage st>
-        static constexpr StageFn* neon_lowp() { return nullptr; }
+        static constexpr StageFn* neon_lowp();
 
         #define LOWP(st)                                                         \
             template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() {   \
                 return sk_##st##_lowp;                                           \
             }
+        #define NOPE(st)                                                         \
+            template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() {   \
+                return nullptr;                                                  \
+            }
+
     #else
         #define LOWP(st)
+        #define NOPE(st)
 
     #endif
 
+    #define TODO(st) NOPE(st)  // stages that should be implemented in lowp, but aren't.
+
+    NOPE(callback)
+    LOWP(move_src_dst) LOWP(move_dst_src)
+    NOPE(clamp_0) NOPE(clamp_1) TODO(clamp_a) TODO(clamp_a_dst)
+    NOPE(unpremul) LOWP(premul) TODO(premul_dst)
+    LOWP(set_rgb) LOWP(swap_rb) LOWP(invert)
+    NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb)
     LOWP(black_color) LOWP(white_color) LOWP(uniform_color)
-    LOWP(set_rgb)
-    LOWP(premul)
-    LOWP(luminance_to_alpha)
-    LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888)
-    LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra)
-    LOWP(load_a8)   LOWP(load_a8_dst)   LOWP(store_a8)
-    LOWP(load_g8)   LOWP(load_g8_dst)
-    LOWP(load_565)  LOWP(load_565_dst)  LOWP(store_565)
-    LOWP(swap_rb)
+    LOWP(seed_shader) TODO(dither)
+    LOWP(load_a8)   LOWP(load_a8_dst)   LOWP(store_a8)   LOWP(gather_a8)
+    LOWP(load_g8)   LOWP(load_g8_dst)                    LOWP(gather_g8)
+    LOWP(load_565)  LOWP(load_565_dst)  LOWP(store_565)  LOWP(gather_565)
+    TODO(load_4444) TODO(load_4444_dst) TODO(store_4444) TODO(gather_4444)
+    NOPE(load_f16)  NOPE(load_f16_dst)  NOPE(store_f16)  NOPE(gather_f16)
+    NOPE(load_f32)  NOPE(load_f32_dst)  NOPE(store_f32)
+    LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888)
+    LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra)
+    TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be)
+    NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables)
+    NOPE(load_rgba) NOPE(store_rgba)
+    LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float)
+    LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float)
+    LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover)
+    LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover)
+    LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_)
+    NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference)
+    LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight)
+    NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity)
     LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888)
-    LOWP(lerp_1_float)
-    LOWP(lerp_u8)
-    LOWP(lerp_565)
-    LOWP(scale_1_float)
-    LOWP(scale_u8)
-    LOWP(scale_565)
-    LOWP(move_src_dst)
-    LOWP(move_dst_src)
-    LOWP(clear)
-    LOWP(srcatop)
-    LOWP(dstatop)
-    LOWP(srcin)
-    LOWP(dstin)
-    LOWP(srcout)
-    LOWP(dstout)
-    LOWP(srcover)
-    LOWP(dstover)
-    LOWP(modulate)
-    LOWP(multiply)
-    LOWP(screen)
-    LOWP(xor_)
-    LOWP(plus_)
-    LOWP(darken)
-    LOWP(lighten)
-    LOWP(difference)
-    LOWP(exclusion)
-    LOWP(hardlight)
-    LOWP(overlay)
-    LOWP(seed_shader)
-    LOWP(matrix_translate) LOWP(matrix_scale_translate) LOWP(matrix_2x3) LOWP(matrix_perspective)
-    LOWP(gather_8888) LOWP(gather_bgra) LOWP(gather_565) LOWP(gather_a8) LOWP(gather_g8)
+    LOWP(luminance_to_alpha)
+    LOWP(matrix_translate) LOWP(matrix_scale_translate)
+    LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3)
+    LOWP(matrix_perspective)
+    NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b)
+    NOPE(parametric_a) NOPE(gamma)
+    NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a)
+    NOPE(lab_to_xyz)
+                    TODO(mirror_x)   TODO(repeat_x)
+                    TODO(mirror_y)   TODO(repeat_y)
+    TODO(clamp_x_1) TODO(mirror_x_1) TODO(repeat_x_1)
+    TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py)
+    TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x)
+    TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y)
+    TODO(save_xy) TODO(accumulate)
+    TODO(evenly_spaced_gradient)
+    TODO(gradient)
+    TODO(evenly_spaced_2_stop_gradient)
+    TODO(xy_to_unit_angle)
+    TODO(xy_to_radius)
+    TODO(xy_to_2pt_conical_quadratic_min)
+    TODO(xy_to_2pt_conical_quadratic_max)
+    TODO(xy_to_2pt_conical_linear)
+    TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask)
+    TODO(byte_tables) TODO(byte_tables_rgb)
+    NOPE(rgb_to_hsl) NOPE(hsl_to_rgb)
+    NOPE(clut_3D) NOPE(clut_4D)
+    NOPE(gauss_a_to_rgba)
+
     #undef LOWP
+    #undef TODO
+    #undef NOPE
 #endif
 
 // Engines comprise everything we need to run SkRasterPipelines.
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index b7b6710..fa97122 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -7027,52 +7027,6 @@
   .long  0xe28dd004                          // add           sp, sp, #4
   .long  0xe8bd4ff0                          // pop           {r4, r5, r6, r7, r8, r9, sl, fp, lr}
   .long  0xe12fff12                          // bx            r2
-  .long  0xe320f000                          // nop           {0}
-
-HIDDEN _sk_gauss_a_to_rgba_vfp4
-.globl _sk_gauss_a_to_rgba_vfp4
-FUNCTION(_sk_gauss_a_to_rgba_vfp4)
-_sk_gauss_a_to_rgba_vfp4:
-  .long  0xe28f2048                          // add           r2, pc, #72
-  .long  0xf4620acf                          // vld1.64       {d16-d17}, [r2]
-  .long  0xe28f2050                          // add           r2, pc, #80
-  .long  0xf4622acf                          // vld1.64       {d18-d19}, [r2]
-  .long  0xe28f2058                          // add           r2, pc, #88
-  .long  0xf2462c70                          // vfma.f32      q9, q3, q8
-  .long  0xf4620acf                          // vld1.64       {d16-d17}, [r2]
-  .long  0xe28f205c                          // add           r2, pc, #92
-  .long  0xf2460c72                          // vfma.f32      q8, q3, q9
-  .long  0xf4622acf                          // vld1.64       {d18-d19}, [r2]
-  .long  0xe28f2060                          // add           r2, pc, #96
-  .long  0xf4220acf                          // vld1.64       {d0-d1}, [r2]
-  .long  0xe4912004                          // ldr           r2, [r1], #4
-  .long  0xf2462c70                          // vfma.f32      q9, q3, q8
-  .long  0xf2060c72                          // vfma.f32      q0, q3, q9
-  .long  0xf2202150                          // vorr          q1, q0, q0
-  .long  0xf2204150                          // vorr          q2, q0, q0
-  .long  0xf2206150                          // vorr          q3, q0, q0
-  .long  0xe12fff12                          // bx            r2
-  .long  0xe320f000                          // nop           {0}
-  .long  0xc011102d                          // .word         0xc011102d
-  .long  0xc011102d                          // .word         0xc011102d
-  .long  0xc011102d                          // .word         0xc011102d
-  .long  0xc011102d                          // .word         0xc011102d
-  .long  0x40397812                          // .word         0x40397812
-  .long  0x40397812                          // .word         0x40397812
-  .long  0x40397812                          // .word         0x40397812
-  .long  0x40397812                          // .word         0x40397812
-  .long  0x3e5a9420                          // .word         0x3e5a9420
-  .long  0x3e5a9420                          // .word         0x3e5a9420
-  .long  0x3e5a9420                          // .word         0x3e5a9420
-  .long  0x3e5a9420                          // .word         0x3e5a9420
-  .long  0x3e1e9d04                          // .word         0x3e1e9d04
-  .long  0x3e1e9d04                          // .word         0x3e1e9d04
-  .long  0x3e1e9d04                          // .word         0x3e1e9d04
-  .long  0x3e1e9d04                          // .word         0x3e1e9d04
-  .long  0x39a11800                          // .word         0x39a11800
-  .long  0x39a11800                          // .word         0x39a11800
-  .long  0x39a11800                          // .word         0x39a11800
-  .long  0x39a11800                          // .word         0x39a11800
 
 HIDDEN _sk_gradient_vfp4
 .globl _sk_gradient_vfp4
@@ -7087,7 +7041,7 @@
   .long  0xf2c00050                          // vmov.i32      q8, #0
   .long  0xe5923000                          // ldr           r3, [r2]
   .long  0xe3530002                          // cmp           r3, #2
-  .long  0x3a00000a                          // bcc           649c <sk_gradient_vfp4+0x54>
+  .long  0x3a00000a                          // bcc           63f8 <sk_gradient_vfp4+0x54>
   .long  0xe5927024                          // ldr           r7, [r2, #36]
   .long  0xf2c04051                          // vmov.i32      q10, #1
   .long  0xf2c00050                          // vmov.i32      q8, #0
@@ -7098,7 +7052,7 @@
   .long  0xf3468ee8                          // vcge.f32      q12, q11, q12
   .long  0xf35481f2                          // vbsl          q12, q10, q9
   .long  0xf26008e8                          // vadd.i32      q8, q8, q12
-  .long  0x1afffff9                          // bne           6484 <sk_gradient_vfp4+0x3c>
+  .long  0x1afffff9                          // bne           63e0 <sk_gradient_vfp4+0x3c>
   .long  0xee314b90                          // vmov.32       r4, d17[1]
   .long  0xe5926010                          // ldr           r6, [r2, #16]
   .long  0xee11cb90                          // vmov.32       ip, d17[0]
@@ -7217,6 +7171,7 @@
   .long  0xf22001f0                          // vorr          q0, q8, q8
   .long  0xe8bd4010                          // pop           {r4, lr}
   .long  0xe12fff1c                          // bx            ip
+  .long  0xe320f000                          // nop           {0}
 
 HIDDEN _sk_xy_to_unit_angle_vfp4
 .globl _sk_xy_to_unit_angle_vfp4
@@ -8242,7 +8197,7 @@
   .long  0xe0835105                          // add           r5, r3, r5, lsl #2
   .long  0xedd55a00                          // vldr          s11, [r5]
   .long  0xee325b90                          // vmov.32       r5, d18[1]
-  .long  0xea000004                          // b             74b0 <sk_clut_3D_vfp4+0x508>
+  .long  0xea000004                          // b             7410 <sk_clut_3D_vfp4+0x508>
   .long  0xe320f000                          // nop           {0}
   .long  0x3f7ff972                          // .word         0x3f7ff972
   .long  0x3f7ff972                          // .word         0x3f7ff972
@@ -8759,7 +8714,7 @@
   .long  0xf2802051                          // vmov.i32      q1, #1
   .long  0xf22e29e0                          // vmla.i32      q1, q15, q8
   .long  0xedd20a00                          // vldr          s1, [r2]
-  .long  0xea000004                          // b             7cb0 <sk_clut_4D_vfp4+0x528>
+  .long  0xea000004                          // b             7c10 <sk_clut_4D_vfp4+0x528>
   .long  0xe320f000                          // nop           {0}
   .long  0x3f7ff972                          // .word         0x3f7ff972
   .long  0x3f7ff972                          // .word         0x3f7ff972
@@ -9483,6 +9438,51 @@
   .long  0x3f800000                          // .word         0x3f800000
   .long  0x3f800000                          // .word         0x3f800000
   .long  0x3f800000                          // .word         0x3f800000
+
+HIDDEN _sk_gauss_a_to_rgba_vfp4
+.globl _sk_gauss_a_to_rgba_vfp4
+FUNCTION(_sk_gauss_a_to_rgba_vfp4)
+_sk_gauss_a_to_rgba_vfp4:
+  .long  0xe28f2048                          // add           r2, pc, #72
+  .long  0xf4620acf                          // vld1.64       {d16-d17}, [r2]
+  .long  0xe28f2050                          // add           r2, pc, #80
+  .long  0xf4622acf                          // vld1.64       {d18-d19}, [r2]
+  .long  0xe28f2058                          // add           r2, pc, #88
+  .long  0xf2462c70                          // vfma.f32      q9, q3, q8
+  .long  0xf4620acf                          // vld1.64       {d16-d17}, [r2]
+  .long  0xe28f205c                          // add           r2, pc, #92
+  .long  0xf2460c72                          // vfma.f32      q8, q3, q9
+  .long  0xf4622acf                          // vld1.64       {d18-d19}, [r2]
+  .long  0xe28f2060                          // add           r2, pc, #96
+  .long  0xf4220acf                          // vld1.64       {d0-d1}, [r2]
+  .long  0xe4912004                          // ldr           r2, [r1], #4
+  .long  0xf2462c70                          // vfma.f32      q9, q3, q8
+  .long  0xf2060c72                          // vfma.f32      q0, q3, q9
+  .long  0xf2202150                          // vorr          q1, q0, q0
+  .long  0xf2204150                          // vorr          q2, q0, q0
+  .long  0xf2206150                          // vorr          q3, q0, q0
+  .long  0xe12fff12                          // bx            r2
+  .long  0xe320f000                          // nop           {0}
+  .long  0xc011102d                          // .word         0xc011102d
+  .long  0xc011102d                          // .word         0xc011102d
+  .long  0xc011102d                          // .word         0xc011102d
+  .long  0xc011102d                          // .word         0xc011102d
+  .long  0x40397812                          // .word         0x40397812
+  .long  0x40397812                          // .word         0x40397812
+  .long  0x40397812                          // .word         0x40397812
+  .long  0x40397812                          // .word         0x40397812
+  .long  0x3e5a9420                          // .word         0x3e5a9420
+  .long  0x3e5a9420                          // .word         0x3e5a9420
+  .long  0x3e5a9420                          // .word         0x3e5a9420
+  .long  0x3e5a9420                          // .word         0x3e5a9420
+  .long  0x3e1e9d04                          // .word         0x3e1e9d04
+  .long  0x3e1e9d04                          // .word         0x3e1e9d04
+  .long  0x3e1e9d04                          // .word         0x3e1e9d04
+  .long  0x3e1e9d04                          // .word         0x3e1e9d04
+  .long  0x39a11800                          // .word         0x39a11800
+  .long  0x39a11800                          // .word         0x39a11800
+  .long  0x39a11800                          // .word         0x39a11800
+  .long  0x39a11800                          // .word         0x39a11800
 #elif defined(__x86_64__)
 BALIGN32
 
@@ -15465,21 +15465,6 @@
   .byte  197,124,41,192                      // vmovaps       %ymm8,%ymm0
   .byte  255,224                             // jmpq          *%rax
 
-HIDDEN _sk_gauss_a_to_rgba_skx
-.globl _sk_gauss_a_to_rgba_skx
-FUNCTION(_sk_gauss_a_to_rgba_skx)
-_sk_gauss_a_to_rgba_skx:
-  .byte  196,226,125,24,5,27,247,2,0         // vbroadcastss  0x2f71b(%rip),%ymm0        # 356e4 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
-  .byte  98,242,101,56,168,5,21,247,2,0      // vfmadd213ps   0x2f715(%rip){1to8},%ymm3,%ymm0        # 356e8 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
-  .byte  98,242,101,56,168,5,15,247,2,0      // vfmadd213ps   0x2f70f(%rip){1to8},%ymm3,%ymm0        # 356ec <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
-  .byte  98,242,101,56,168,5,9,247,2,0       // vfmadd213ps   0x2f709(%rip){1to8},%ymm3,%ymm0        # 356f0 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
-  .byte  98,242,101,56,168,5,3,247,2,0       // vfmadd213ps   0x2f703(%rip){1to8},%ymm3,%ymm0        # 356f4 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
-  .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
-  .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
-  .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  255,224                             // jmpq          *%rax
-
 HIDDEN _sk_gradient_skx
 .globl _sk_gradient_skx
 FUNCTION(_sk_gradient_skx)
@@ -15487,11 +15472,11 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  73,131,248,1                        // cmp           $0x1,%r8
-  .byte  15,134,229,0,0,0                    // jbe           60f5 <_sk_gradient_skx+0xf4>
+  .byte  15,134,229,0,0,0                    // jbe           60b4 <_sk_gradient_skx+0xf4>
   .byte  76,139,72,72                        // mov           0x48(%rax),%r9
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  65,186,1,0,0,0                      // mov           $0x1,%r10d
-  .byte  196,226,125,24,21,149,245,2,0       // vbroadcastss  0x2f595(%rip),%ymm2        # 355bc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
+  .byte  196,226,125,24,21,214,245,2,0       // vbroadcastss  0x2f5d6(%rip),%ymm2        # 355bc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
   .byte  196,65,53,239,201                   // vpxor         %ymm9,%ymm9,%ymm9
   .byte  196,130,125,24,28,145               // vbroadcastss  (%r9,%r10,4),%ymm3
   .byte  98,241,100,40,194,192,2             // vcmpleps      %ymm0,%ymm3,%k0
@@ -15500,10 +15485,10 @@
   .byte  197,53,254,203                      // vpaddd        %ymm3,%ymm9,%ymm9
   .byte  73,131,194,1                        // add           $0x1,%r10
   .byte  77,57,208                           // cmp           %r10,%r8
-  .byte  117,218                             // jne           602c <_sk_gradient_skx+0x2b>
+  .byte  117,218                             // jne           5feb <_sk_gradient_skx+0x2b>
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  73,131,248,8                        // cmp           $0x8,%r8
-  .byte  15,134,158,0,0,0                    // jbe           60fe <_sk_gradient_skx+0xfd>
+  .byte  15,134,158,0,0,0                    // jbe           60bd <_sk_gradient_skx+0xfd>
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,2,117,146,4,137                 // vgatherdps    %ymm1,(%r9,%ymm9,4),%ymm8
@@ -15535,7 +15520,7 @@
   .byte  196,65,20,87,237                    // vxorps        %ymm13,%ymm13,%ymm13
   .byte  72,139,64,64                        // mov           0x40(%rax),%rax
   .byte  196,34,13,146,44,136                // vgatherdps    %ymm14,(%rax,%ymm9,4),%ymm13
-  .byte  235,77                              // jmp           6142 <_sk_gradient_skx+0x141>
+  .byte  235,77                              // jmp           6101 <_sk_gradient_skx+0x141>
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
   .byte  196,66,53,22,1                      // vpermps       (%r9),%ymm9,%ymm8
@@ -15591,24 +15576,24 @@
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,43,245,2,0         // vbroadcastss  0x2f52b(%rip),%ymm13        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
-  .byte  98,114,29,56,168,45,37,245,2,0      // vfmadd213ps   0x2f525(%rip){1to8},%ymm12,%ymm13        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
-  .byte  98,114,29,56,168,45,31,245,2,0      // vfmadd213ps   0x2f51f(%rip){1to8},%ymm12,%ymm13        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
-  .byte  98,114,29,56,168,45,25,245,2,0      // vfmadd213ps   0x2f519(%rip){1to8},%ymm12,%ymm13        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,45,88,245,2,0         // vbroadcastss  0x2f558(%rip),%ymm13        # 356e4 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
+  .byte  98,114,29,56,168,45,82,245,2,0      // vfmadd213ps   0x2f552(%rip){1to8},%ymm12,%ymm13        # 356e8 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
+  .byte  98,114,29,56,168,45,76,245,2,0      // vfmadd213ps   0x2f54c(%rip){1to8},%ymm12,%ymm13        # 356ec <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
+  .byte  98,114,29,56,168,45,70,245,2,0      // vfmadd213ps   0x2f546(%rip){1to8},%ymm12,%ymm13        # 356f0 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
   .byte  196,65,36,89,221                    // vmulps        %ymm13,%ymm11,%ymm11
   .byte  98,209,52,40,194,194,1              // vcmpltps      %ymm10,%ymm9,%k0
   .byte  98,114,126,40,56,200                // vpmovm2d      %k0,%ymm9
-  .byte  196,98,125,24,21,2,245,2,0          // vbroadcastss  0x2f502(%rip),%ymm10        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  .byte  196,98,125,24,21,47,245,2,0         // vbroadcastss  0x2f52f(%rip),%ymm10        # 356f4 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  98,209,124,40,194,192,1             // vcmpltps      %ymm8,%ymm0,%k0
   .byte  98,242,126,40,56,192                // vpmovm2d      %k0,%ymm0
-  .byte  196,98,125,24,21,141,243,2,0        // vbroadcastss  0x2f38d(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,21,206,243,2,0        // vbroadcastss  0x2f3ce(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  98,209,116,40,194,192,1             // vcmpltps      %ymm8,%ymm1,%k0
   .byte  98,114,126,40,56,200                // vpmovm2d      %k0,%ymm9
-  .byte  196,98,125,24,21,112,243,2,0        // vbroadcastss  0x2f370(%rip),%ymm10        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,21,177,243,2,0        // vbroadcastss  0x2f3b1(%rip),%ymm10        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  98,209,124,40,194,192,3             // vcmpunordps   %ymm8,%ymm0,%k0
@@ -15637,20 +15622,20 @@
   .byte  197,50,89,80,76                     // vmulss        0x4c(%rax),%xmm9,%xmm10
   .byte  196,66,125,24,210                   // vbroadcastss  %xmm10,%ymm10
   .byte  197,44,88,208                       // vaddps        %ymm0,%ymm10,%ymm10
-  .byte  98,113,44,56,89,21,108,244,2,0      // vmulps        0x2f46c(%rip){1to8},%ymm10,%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,113,44,56,89,21,153,244,2,0      // vmulps        0x2f499(%rip){1to8},%ymm10,%ymm10        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  197,116,89,217                      // vmulps        %ymm1,%ymm1,%ymm11
   .byte  196,98,125,184,216                  // vfmadd231ps   %ymm0,%ymm0,%ymm11
   .byte  196,193,50,89,193                   // vmulss        %xmm9,%xmm9,%xmm0
   .byte  196,226,125,24,192                  // vbroadcastss  %xmm0,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
-  .byte  98,113,60,56,89,5,79,244,2,0        // vmulps        0x2f44f(%rip){1to8},%ymm8,%ymm8        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  98,113,60,56,89,5,124,244,2,0       // vmulps        0x2f47c(%rip){1to8},%ymm8,%ymm8        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,194,45,184,194                  // vfmadd231ps   %ymm10,%ymm10,%ymm0
   .byte  197,252,81,192                      // vsqrtps       %ymm0,%ymm0
   .byte  196,98,125,24,64,68                 // vbroadcastss  0x44(%rax),%ymm8
-  .byte  98,113,44,56,87,13,54,244,2,0       // vxorps        0x2f436(%rip){1to8},%ymm10,%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  98,113,44,56,87,13,99,244,2,0       // vxorps        0x2f463(%rip){1to8},%ymm10,%ymm9        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  98,113,60,56,89,5,199,242,2,0       // vmulps        0x2f2c7(%rip){1to8},%ymm8,%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  98,113,60,56,89,5,8,243,2,0         // vmulps        0x2f308(%rip){1to8},%ymm8,%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,44,89,208                    // vmulps        %ymm8,%ymm10,%ymm10
   .byte  197,180,92,192                      // vsubps        %ymm0,%ymm9,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -15668,20 +15653,20 @@
   .byte  197,50,89,80,76                     // vmulss        0x4c(%rax),%xmm9,%xmm10
   .byte  196,66,125,24,210                   // vbroadcastss  %xmm10,%ymm10
   .byte  197,44,88,208                       // vaddps        %ymm0,%ymm10,%ymm10
-  .byte  98,113,44,56,89,21,228,243,2,0      // vmulps        0x2f3e4(%rip){1to8},%ymm10,%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,113,44,56,89,21,17,244,2,0       // vmulps        0x2f411(%rip){1to8},%ymm10,%ymm10        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  197,116,89,217                      // vmulps        %ymm1,%ymm1,%ymm11
   .byte  196,98,125,184,216                  // vfmadd231ps   %ymm0,%ymm0,%ymm11
   .byte  196,193,50,89,193                   // vmulss        %xmm9,%xmm9,%xmm0
   .byte  196,226,125,24,192                  // vbroadcastss  %xmm0,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
-  .byte  98,113,60,56,89,5,199,243,2,0       // vmulps        0x2f3c7(%rip){1to8},%ymm8,%ymm8        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  98,113,60,56,89,5,244,243,2,0       // vmulps        0x2f3f4(%rip){1to8},%ymm8,%ymm8        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,194,45,184,194                  // vfmadd231ps   %ymm10,%ymm10,%ymm0
   .byte  197,252,81,192                      // vsqrtps       %ymm0,%ymm0
   .byte  196,98,125,24,64,68                 // vbroadcastss  0x44(%rax),%ymm8
-  .byte  98,113,44,56,87,13,174,243,2,0      // vxorps        0x2f3ae(%rip){1to8},%ymm10,%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  98,113,44,56,87,13,219,243,2,0      // vxorps        0x2f3db(%rip){1to8},%ymm10,%ymm9        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  98,113,60,56,89,5,63,242,2,0        // vmulps        0x2f23f(%rip){1to8},%ymm8,%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  98,113,60,56,89,5,128,242,2,0       // vmulps        0x2f280(%rip){1to8},%ymm8,%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,44,89,208                    // vmulps        %ymm8,%ymm10,%ymm10
   .byte  197,180,92,192                      // vsubps        %ymm0,%ymm9,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -15698,13 +15683,13 @@
   .byte  197,58,89,72,76                     // vmulss        0x4c(%rax),%xmm8,%xmm9
   .byte  196,66,125,24,201                   // vbroadcastss  %xmm9,%ymm9
   .byte  197,52,88,200                       // vaddps        %ymm0,%ymm9,%ymm9
-  .byte  98,113,52,56,89,13,98,243,2,0       // vmulps        0x2f362(%rip){1to8},%ymm9,%ymm9        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,113,52,56,89,13,143,243,2,0      // vmulps        0x2f38f(%rip){1to8},%ymm9,%ymm9        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  197,116,89,209                      // vmulps        %ymm1,%ymm1,%ymm10
   .byte  196,98,125,184,208                  // vfmadd231ps   %ymm0,%ymm0,%ymm10
   .byte  196,193,58,89,192                   // vmulss        %xmm8,%xmm8,%xmm0
   .byte  196,226,125,24,192                  // vbroadcastss  %xmm0,%ymm0
   .byte  197,172,92,192                      // vsubps        %ymm0,%ymm10,%ymm0
-  .byte  98,241,124,56,87,5,73,243,2,0       // vxorps        0x2f349(%rip){1to8},%ymm0,%ymm0        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  98,241,124,56,87,5,118,243,2,0      // vxorps        0x2f376(%rip){1to8},%ymm0,%ymm0        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,193,124,94,193                  // vdivps        %ymm9,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15746,7 +15731,7 @@
 FUNCTION(_sk_save_xy_skx)
 _sk_save_xy_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,118,241,2,0         // vbroadcastss  0x2f176(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,183,241,2,0         // vbroadcastss  0x2f1b7(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -15781,8 +15766,8 @@
 _sk_bilinear_nx_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,0                        // vmovups       (%rax),%ymm0
-  .byte  98,241,124,56,88,5,95,242,2,0       // vaddps        0x2f25f(%rip){1to8},%ymm0,%ymm0        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
-  .byte  196,98,125,24,5,246,240,2,0         // vbroadcastss  0x2f0f6(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  98,241,124,56,88,5,140,242,2,0      // vaddps        0x2f28c(%rip){1to8},%ymm0,%ymm0        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,5,55,241,2,0          // vbroadcastss  0x2f137(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -15795,7 +15780,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,0                        // vmovups       (%rax),%ymm0
   .byte  197,124,16,128,128,0,0,0            // vmovups       0x80(%rax),%ymm8
-  .byte  98,241,124,56,88,5,198,240,2,0      // vaddps        0x2f0c6(%rip){1to8},%ymm0,%ymm0        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  98,241,124,56,88,5,7,241,2,0        // vaddps        0x2f107(%rip){1to8},%ymm0,%ymm0        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15806,8 +15791,8 @@
 _sk_bilinear_ny_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,72,64                    // vmovups       0x40(%rax),%ymm1
-  .byte  98,241,116,56,88,13,13,242,2,0      // vaddps        0x2f20d(%rip){1to8},%ymm1,%ymm1        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
-  .byte  196,98,125,24,5,164,240,2,0         // vbroadcastss  0x2f0a4(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  98,241,116,56,88,13,58,242,2,0      // vaddps        0x2f23a(%rip){1to8},%ymm1,%ymm1        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,5,229,240,2,0         // vbroadcastss  0x2f0e5(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -15820,7 +15805,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,72,64                    // vmovups       0x40(%rax),%ymm1
   .byte  197,124,16,128,192,0,0,0            // vmovups       0xc0(%rax),%ymm8
-  .byte  98,241,116,56,88,13,115,240,2,0     // vaddps        0x2f073(%rip){1to8},%ymm1,%ymm1        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  98,241,116,56,88,13,180,240,2,0     // vaddps        0x2f0b4(%rip){1to8},%ymm1,%ymm1        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15831,12 +15816,12 @@
 _sk_bicubic_n3x_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,0                        // vmovups       (%rax),%ymm0
-  .byte  98,241,124,56,88,5,191,241,2,0      // vaddps        0x2f1bf(%rip){1to8},%ymm0,%ymm0        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
-  .byte  196,98,125,24,5,82,240,2,0          // vbroadcastss  0x2f052(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  98,241,124,56,88,5,236,241,2,0      // vaddps        0x2f1ec(%rip){1to8},%ymm0,%ymm0        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  .byte  196,98,125,24,5,147,240,2,0         // vbroadcastss  0x2f093(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,164,241,2,0        // vbroadcastss  0x2f1a4(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
-  .byte  98,114,61,56,168,21,166,240,2,0     // vfmadd213ps   0x2f0a6(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,209,241,2,0        // vbroadcastss  0x2f1d1(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,114,61,56,168,21,231,240,2,0     // vfmadd213ps   0x2f0e7(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,44,89,193                    // vmulps        %ymm9,%ymm10,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -15848,13 +15833,13 @@
 _sk_bicubic_n1x_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,0                        // vmovups       (%rax),%ymm0
-  .byte  98,241,124,56,88,5,113,241,2,0      // vaddps        0x2f171(%rip){1to8},%ymm0,%ymm0        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
-  .byte  196,98,125,24,5,8,240,2,0           // vbroadcastss  0x2f008(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  98,241,124,56,88,5,158,241,2,0      // vaddps        0x2f19e(%rip){1to8},%ymm0,%ymm0        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,5,73,240,2,0          // vbroadcastss  0x2f049(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,99,241,2,0         // vbroadcastss  0x2f163(%rip),%ymm9        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  .byte  98,114,61,56,168,13,93,241,2,0      // vfmadd213ps   0x2f15d(%rip){1to8},%ymm8,%ymm9        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
-  .byte  98,114,61,56,168,13,223,239,2,0     // vfmadd213ps   0x2efdf(%rip){1to8},%ymm8,%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
-  .byte  98,114,61,56,168,13,77,241,2,0      // vfmadd213ps   0x2f14d(%rip){1to8},%ymm8,%ymm9        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,13,144,241,2,0        // vbroadcastss  0x2f190(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  98,114,61,56,168,13,138,241,2,0     // vfmadd213ps   0x2f18a(%rip){1to8},%ymm8,%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  98,114,61,56,168,13,32,240,2,0      // vfmadd213ps   0x2f020(%rip){1to8},%ymm8,%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  98,114,61,56,168,13,122,241,2,0     // vfmadd213ps   0x2f17a(%rip){1to8},%ymm8,%ymm9        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  197,124,17,136,0,1,0,0              // vmovups       %ymm9,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15864,13 +15849,13 @@
 FUNCTION(_sk_bicubic_p1x_skx)
 _sk_bicubic_p1x_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,190,239,2,0         // vbroadcastss  0x2efbe(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,255,239,2,0         // vbroadcastss  0x2efff(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,136,128,0,0,0            // vmovups       0x80(%rax),%ymm9
-  .byte  196,98,125,24,21,25,241,2,0         // vbroadcastss  0x2f119(%rip),%ymm10        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  .byte  98,114,53,56,168,21,19,241,2,0      // vfmadd213ps   0x2f113(%rip){1to8},%ymm9,%ymm10        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,98,125,24,21,70,241,2,0         // vbroadcastss  0x2f146(%rip),%ymm10        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  98,114,53,56,168,21,64,241,2,0      // vfmadd213ps   0x2f140(%rip){1to8},%ymm9,%ymm10        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,66,53,168,208                   // vfmadd213ps   %ymm8,%ymm9,%ymm10
-  .byte  98,114,53,56,168,21,8,241,2,0       // vfmadd213ps   0x2f108(%rip){1to8},%ymm9,%ymm10        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  98,114,53,56,168,21,53,241,2,0      // vfmadd213ps   0x2f135(%rip){1to8},%ymm9,%ymm10        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  197,124,17,144,0,1,0,0              // vmovups       %ymm10,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15882,10 +15867,10 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,0                        // vmovups       (%rax),%ymm0
   .byte  197,124,16,128,128,0,0,0            // vmovups       0x80(%rax),%ymm8
-  .byte  98,241,124,56,88,5,224,240,2,0      // vaddps        0x2f0e0(%rip){1to8},%ymm0,%ymm0        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  98,241,124,56,88,5,13,241,2,0       // vaddps        0x2f10d(%rip){1to8},%ymm0,%ymm0        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,202,240,2,0        // vbroadcastss  0x2f0ca(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
-  .byte  98,114,61,56,168,21,204,239,2,0     // vfmadd213ps   0x2efcc(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,247,240,2,0        // vbroadcastss  0x2f0f7(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,114,61,56,168,21,13,240,2,0      // vfmadd213ps   0x2f00d(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,52,89,194                    // vmulps        %ymm10,%ymm9,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -15897,12 +15882,12 @@
 _sk_bicubic_n3y_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,72,64                    // vmovups       0x40(%rax),%ymm1
-  .byte  98,241,116,56,88,13,154,240,2,0     // vaddps        0x2f09a(%rip){1to8},%ymm1,%ymm1        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
-  .byte  196,98,125,24,5,45,239,2,0          // vbroadcastss  0x2ef2d(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  98,241,116,56,88,13,199,240,2,0     // vaddps        0x2f0c7(%rip){1to8},%ymm1,%ymm1        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  .byte  196,98,125,24,5,110,239,2,0         // vbroadcastss  0x2ef6e(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,127,240,2,0        // vbroadcastss  0x2f07f(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
-  .byte  98,114,61,56,168,21,129,239,2,0     // vfmadd213ps   0x2ef81(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,172,240,2,0        // vbroadcastss  0x2f0ac(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,114,61,56,168,21,194,239,2,0     // vfmadd213ps   0x2efc2(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,44,89,193                    // vmulps        %ymm9,%ymm10,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -15914,13 +15899,13 @@
 _sk_bicubic_n1y_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,72,64                    // vmovups       0x40(%rax),%ymm1
-  .byte  98,241,116,56,88,13,75,240,2,0      // vaddps        0x2f04b(%rip){1to8},%ymm1,%ymm1        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
-  .byte  196,98,125,24,5,226,238,2,0         // vbroadcastss  0x2eee2(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  98,241,116,56,88,13,120,240,2,0     // vaddps        0x2f078(%rip){1to8},%ymm1,%ymm1        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,5,35,239,2,0          // vbroadcastss  0x2ef23(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,61,240,2,0         // vbroadcastss  0x2f03d(%rip),%ymm9        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  .byte  98,114,61,56,168,13,55,240,2,0      // vfmadd213ps   0x2f037(%rip){1to8},%ymm8,%ymm9        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
-  .byte  98,114,61,56,168,13,185,238,2,0     // vfmadd213ps   0x2eeb9(%rip){1to8},%ymm8,%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
-  .byte  98,114,61,56,168,13,39,240,2,0      // vfmadd213ps   0x2f027(%rip){1to8},%ymm8,%ymm9        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,13,106,240,2,0        // vbroadcastss  0x2f06a(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  98,114,61,56,168,13,100,240,2,0     // vfmadd213ps   0x2f064(%rip){1to8},%ymm8,%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  98,114,61,56,168,13,250,238,2,0     // vfmadd213ps   0x2eefa(%rip){1to8},%ymm8,%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  98,114,61,56,168,13,84,240,2,0      // vfmadd213ps   0x2f054(%rip){1to8},%ymm8,%ymm9        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  197,124,17,136,64,1,0,0             // vmovups       %ymm9,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15930,13 +15915,13 @@
 FUNCTION(_sk_bicubic_p1y_skx)
 _sk_bicubic_p1y_skx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,152,238,2,0         // vbroadcastss  0x2ee98(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,217,238,2,0         // vbroadcastss  0x2eed9(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,188,88,72,64                    // vaddps        0x40(%rax),%ymm8,%ymm1
   .byte  197,124,16,136,192,0,0,0            // vmovups       0xc0(%rax),%ymm9
-  .byte  196,98,125,24,21,242,239,2,0        // vbroadcastss  0x2eff2(%rip),%ymm10        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  .byte  98,114,53,56,168,21,236,239,2,0     // vfmadd213ps   0x2efec(%rip){1to8},%ymm9,%ymm10        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,98,125,24,21,31,240,2,0         // vbroadcastss  0x2f01f(%rip),%ymm10        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  98,114,53,56,168,21,25,240,2,0      // vfmadd213ps   0x2f019(%rip){1to8},%ymm9,%ymm10        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,66,53,168,208                   // vfmadd213ps   %ymm8,%ymm9,%ymm10
-  .byte  98,114,53,56,168,21,225,239,2,0     // vfmadd213ps   0x2efe1(%rip){1to8},%ymm9,%ymm10        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  98,114,53,56,168,21,14,240,2,0      // vfmadd213ps   0x2f00e(%rip){1to8},%ymm9,%ymm10        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  197,124,17,144,64,1,0,0             // vmovups       %ymm10,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15948,10 +15933,10 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,16,72,64                    // vmovups       0x40(%rax),%ymm1
   .byte  197,124,16,128,192,0,0,0            // vmovups       0xc0(%rax),%ymm8
-  .byte  98,241,116,56,88,13,184,239,2,0     // vaddps        0x2efb8(%rip){1to8},%ymm1,%ymm1        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  98,241,116,56,88,13,229,239,2,0     // vaddps        0x2efe5(%rip){1to8},%ymm1,%ymm1        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,162,239,2,0        // vbroadcastss  0x2efa2(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
-  .byte  98,114,61,56,168,21,164,238,2,0     // vfmadd213ps   0x2eea4(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,207,239,2,0        // vbroadcastss  0x2efcf(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  98,114,61,56,168,21,229,238,2,0     // vfmadd213ps   0x2eee5(%rip){1to8},%ymm8,%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,52,89,194                    // vmulps        %ymm10,%ymm9,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -16077,7 +16062,7 @@
   .byte  98,226,61,40,64,224                 // vpmulld       %ymm0,%ymm8,%ymm20
   .byte  98,209,93,32,254,193                // vpaddd        %ymm9,%ymm20,%ymm0
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  98,98,125,40,88,29,172,237,2,0      // vpbroadcastd  0x2edac(%rip),%ymm27        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  98,98,125,40,88,29,217,237,2,0      // vpbroadcastd  0x2edd9(%rip),%ymm27        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   .byte  98,146,125,40,64,195                // vpmulld       %ymm27,%ymm0,%ymm0
   .byte  196,65,45,239,210                   // vpxor         %ymm10,%ymm10,%ymm10
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
@@ -16090,9 +16075,9 @@
   .byte  196,65,37,118,219                   // vpcmpeqd      %ymm11,%ymm11,%ymm11
   .byte  196,162,37,146,12,144               // vgatherdps    %ymm11,(%rax,%ymm10,4),%ymm1
   .byte  98,97,124,40,40,233                 // vmovaps       %ymm1,%ymm29
-  .byte  196,98,125,24,21,98,237,2,0         // vbroadcastss  0x2ed62(%rip),%ymm10        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,98,125,24,21,143,237,2,0        // vbroadcastss  0x2ed8f(%rip),%ymm10        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   .byte  98,81,60,32,88,218                  // vaddps        %ymm10,%ymm24,%ymm11
-  .byte  98,226,125,40,88,5,230,235,2,0      // vpbroadcastd  0x2ebe6(%rip),%ymm16        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  .byte  98,226,125,40,88,5,39,236,2,0       // vpbroadcastd  0x2ec27(%rip),%ymm16        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   .byte  98,177,125,40,254,192               // vpaddd        %ymm16,%ymm0,%ymm0
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  196,65,29,118,228                   // vpcmpeqd      %ymm12,%ymm12,%ymm12
@@ -16332,7 +16317,7 @@
   .byte  98,162,45,40,64,229                 // vpmulld       %ymm21,%ymm10,%ymm20
   .byte  98,241,93,32,254,193                // vpaddd        %ymm1,%ymm20,%ymm0
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  98,226,125,40,88,5,62,232,2,0       // vpbroadcastd  0x2e83e(%rip),%ymm16        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  98,226,125,40,88,5,107,232,2,0      // vpbroadcastd  0x2e86b(%rip),%ymm16        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   .byte  98,50,125,40,64,216                 // vpmulld       %ymm16,%ymm0,%ymm11
   .byte  196,65,28,87,228                    // vxorps        %ymm12,%ymm12,%ymm12
   .byte  197,253,118,192                     // vpcmpeqd      %ymm0,%ymm0,%ymm0
@@ -16344,9 +16329,9 @@
   .byte  196,65,29,118,228                   // vpcmpeqd      %ymm12,%ymm12,%ymm12
   .byte  196,98,29,146,44,128                // vgatherdps    %ymm12,(%rax,%ymm0,4),%ymm13
   .byte  197,124,17,172,36,192,4,0,0         // vmovups       %ymm13,0x4c0(%rsp)
-  .byte  196,226,125,24,5,240,231,2,0        // vbroadcastss  0x2e7f0(%rip),%ymm0        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,226,125,24,5,29,232,2,0         // vbroadcastss  0x2e81d(%rip),%ymm0        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   .byte  98,113,28,32,88,224                 // vaddps        %ymm0,%ymm28,%ymm12
-  .byte  98,226,125,40,88,13,116,230,2,0     // vpbroadcastd  0x2e674(%rip),%ymm17        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  .byte  98,226,125,40,88,13,181,230,2,0     // vpbroadcastd  0x2e6b5(%rip),%ymm17        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   .byte  98,49,37,40,254,217                 // vpaddd        %ymm17,%ymm11,%ymm11
   .byte  197,236,87,210                      // vxorps        %ymm2,%ymm2,%ymm2
   .byte  196,65,21,118,237                   // vpcmpeqd      %ymm13,%ymm13,%ymm13
@@ -16751,13 +16736,28 @@
   .byte  197,228,92,214                      // vsubps        %ymm6,%ymm3,%ymm2
   .byte  196,226,93,168,214                  // vfmadd213ps   %ymm6,%ymm4,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,18,221,2,0        // vbroadcastss  0x2dd12(%rip),%ymm3        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,226,125,24,29,83,221,2,0        // vbroadcastss  0x2dd53(%rip),%ymm3        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  98,145,124,40,40,227                // vmovaps       %ymm27,%ymm4
   .byte  197,252,16,108,36,128               // vmovups       -0x80(%rsp),%ymm5
   .byte  197,252,16,116,36,160               // vmovups       -0x60(%rsp),%ymm6
   .byte  197,252,16,124,36,224               // vmovups       -0x20(%rsp),%ymm7
   .byte  72,129,196,88,5,0,0                 // add           $0x558,%rsp
   .byte  255,224                             // jmpq          *%rax
+
+HIDDEN _sk_gauss_a_to_rgba_skx
+.globl _sk_gauss_a_to_rgba_skx
+FUNCTION(_sk_gauss_a_to_rgba_skx)
+_sk_gauss_a_to_rgba_skx:
+  .byte  196,226,125,24,5,149,222,2,0        // vbroadcastss  0x2de95(%rip),%ymm0        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  98,242,101,56,168,5,143,222,2,0     // vfmadd213ps   0x2de8f(%rip){1to8},%ymm3,%ymm0        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  98,242,101,56,168,5,137,222,2,0     // vfmadd213ps   0x2de89(%rip){1to8},%ymm3,%ymm0        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  98,242,101,56,168,5,131,222,2,0     // vfmadd213ps   0x2de83(%rip){1to8},%ymm3,%ymm0        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  98,242,101,56,168,5,125,222,2,0     // vfmadd213ps   0x2de7d(%rip){1to8},%ymm3,%ymm0        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
+  .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
+  .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
+  .byte  255,224                             // jmpq          *%rax
   .byte  144                                 // nop
 
 HIDDEN _sk_start_pipeline_hsw
@@ -22813,25 +22813,6 @@
   .byte  197,124,41,192                      // vmovaps       %ymm8,%ymm0
   .byte  255,224                             // jmpq          *%rax
 
-HIDDEN _sk_gauss_a_to_rgba_hsw
-.globl _sk_gauss_a_to_rgba_hsw
-FUNCTION(_sk_gauss_a_to_rgba_hsw)
-_sk_gauss_a_to_rgba_hsw:
-  .byte  196,226,125,24,5,126,127,2,0        // vbroadcastss  0x27f7e(%rip),%ymm0        # 356e8 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
-  .byte  196,226,125,24,13,113,127,2,0       // vbroadcastss  0x27f71(%rip),%ymm1        # 356e4 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
-  .byte  196,226,101,168,200                 // vfmadd213ps   %ymm0,%ymm3,%ymm1
-  .byte  196,226,125,24,5,107,127,2,0        // vbroadcastss  0x27f6b(%rip),%ymm0        # 356ec <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
-  .byte  196,226,101,184,193                 // vfmadd231ps   %ymm1,%ymm3,%ymm0
-  .byte  196,226,125,24,13,97,127,2,0        // vbroadcastss  0x27f61(%rip),%ymm1        # 356f0 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
-  .byte  196,226,101,184,200                 // vfmadd231ps   %ymm0,%ymm3,%ymm1
-  .byte  196,226,125,24,5,87,127,2,0         // vbroadcastss  0x27f57(%rip),%ymm0        # 356f4 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
-  .byte  196,226,101,184,193                 // vfmadd231ps   %ymm1,%ymm3,%ymm0
-  .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
-  .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
-  .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  255,224                             // jmpq          *%rax
-
 HIDDEN _sk_gradient_hsw
 .globl _sk_gradient_hsw
 FUNCTION(_sk_gradient_hsw)
@@ -22839,11 +22820,11 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  73,131,248,1                        // cmp           $0x1,%r8
-  .byte  15,134,220,0,0,0                    // jbe           d89d <_sk_gradient_hsw+0xeb>
+  .byte  15,134,220,0,0,0                    // jbe           d84c <_sk_gradient_hsw+0xeb>
   .byte  76,139,72,72                        // mov           0x48(%rax),%r9
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  65,186,1,0,0,0                      // mov           $0x1,%r10d
-  .byte  196,226,125,24,21,228,125,2,0       // vbroadcastss  0x27de4(%rip),%ymm2        # 355bc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
+  .byte  196,226,125,24,21,53,126,2,0        // vbroadcastss  0x27e35(%rip),%ymm2        # 355bc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
   .byte  196,65,53,239,201                   // vpxor         %ymm9,%ymm9,%ymm9
   .byte  196,130,125,24,28,145               // vbroadcastss  (%r9,%r10,4),%ymm3
   .byte  197,228,194,216,2                   // vcmpleps      %ymm0,%ymm3,%ymm3
@@ -22851,10 +22832,10 @@
   .byte  197,53,254,203                      // vpaddd        %ymm3,%ymm9,%ymm9
   .byte  73,255,194                          // inc           %r10
   .byte  77,57,208                           // cmp           %r10,%r8
-  .byte  117,227                             // jne           d7dd <_sk_gradient_hsw+0x2b>
+  .byte  117,227                             // jne           d78c <_sk_gradient_hsw+0x2b>
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  73,131,248,8                        // cmp           $0x8,%r8
-  .byte  15,134,158,0,0,0                    // jbe           d8a6 <_sk_gradient_hsw+0xf4>
+  .byte  15,134,158,0,0,0                    // jbe           d855 <_sk_gradient_hsw+0xf4>
   .byte  196,65,13,118,246                   // vpcmpeqd      %ymm14,%ymm14,%ymm14
   .byte  196,65,36,87,219                    // vxorps        %ymm11,%ymm11,%ymm11
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
@@ -22886,7 +22867,7 @@
   .byte  196,130,5,146,28,136                // vgatherdps    %ymm15,(%r8,%ymm9,4),%ymm3
   .byte  72,139,64,64                        // mov           0x40(%rax),%rax
   .byte  196,34,13,146,28,136                // vgatherdps    %ymm14,(%rax,%ymm9,4),%ymm11
-  .byte  235,77                              // jmp           d8ea <_sk_gradient_hsw+0x138>
+  .byte  235,77                              // jmp           d899 <_sk_gradient_hsw+0x138>
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
   .byte  196,66,53,22,1                      // vpermps       (%r9),%ymm9,%ymm8
@@ -22946,24 +22927,24 @@
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,115,125,2,0        // vbroadcastss  0x27d73(%rip),%ymm13        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
-  .byte  196,98,125,24,53,110,125,2,0        // vbroadcastss  0x27d6e(%rip),%ymm14        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
+  .byte  196,98,125,24,45,176,125,2,0        // vbroadcastss  0x27db0(%rip),%ymm13        # 356e4 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
+  .byte  196,98,125,24,53,171,125,2,0        // vbroadcastss  0x27dab(%rip),%ymm14        # 356e8 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
   .byte  196,66,29,184,245                   // vfmadd231ps   %ymm13,%ymm12,%ymm14
-  .byte  196,98,125,24,45,100,125,2,0        // vbroadcastss  0x27d64(%rip),%ymm13        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
+  .byte  196,98,125,24,45,161,125,2,0        // vbroadcastss  0x27da1(%rip),%ymm13        # 356ec <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
   .byte  196,66,29,184,238                   // vfmadd231ps   %ymm14,%ymm12,%ymm13
-  .byte  196,98,125,24,53,90,125,2,0         // vbroadcastss  0x27d5a(%rip),%ymm14        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,53,151,125,2,0        // vbroadcastss  0x27d97(%rip),%ymm14        # 356f0 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
   .byte  196,66,29,184,245                   // vfmadd231ps   %ymm13,%ymm12,%ymm14
   .byte  196,65,36,89,222                    // vmulps        %ymm14,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,69,125,2,0         // vbroadcastss  0x27d45(%rip),%ymm10        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  .byte  196,98,125,24,21,130,125,2,0        // vbroadcastss  0x27d82(%rip),%ymm10        # 356f4 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,215,123,2,0        // vbroadcastss  0x27bd7(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,21,40,124,2,0         // vbroadcastss  0x27c28(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,193,123,2,0        // vbroadcastss  0x27bc1(%rip),%ymm10        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,21,18,124,2,0         // vbroadcastss  0x27c12(%rip),%ymm10        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -22991,23 +22972,23 @@
   .byte  197,50,89,80,76                     // vmulss        0x4c(%rax),%xmm9,%xmm10
   .byte  196,66,125,24,210                   // vbroadcastss  %xmm10,%ymm10
   .byte  197,44,88,208                       // vaddps        %ymm0,%ymm10,%ymm10
-  .byte  196,98,125,24,29,197,124,2,0        // vbroadcastss  0x27cc5(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  196,98,125,24,29,2,125,2,0          // vbroadcastss  0x27d02(%rip),%ymm11        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
   .byte  197,116,89,217                      // vmulps        %ymm1,%ymm1,%ymm11
   .byte  196,98,125,184,216                  // vfmadd231ps   %ymm0,%ymm0,%ymm11
   .byte  196,193,50,89,193                   // vmulss        %xmm9,%xmm9,%xmm0
   .byte  196,226,125,24,192                  // vbroadcastss  %xmm0,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
-  .byte  196,98,125,24,13,164,124,2,0        // vbroadcastss  0x27ca4(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  196,98,125,24,13,225,124,2,0        // vbroadcastss  0x27ce1(%rip),%ymm9        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,194,45,184,194                  // vfmadd231ps   %ymm10,%ymm10,%ymm0
   .byte  197,252,81,192                      // vsqrtps       %ymm0,%ymm0
   .byte  196,98,125,24,64,68                 // vbroadcastss  0x44(%rax),%ymm8
-  .byte  196,98,125,24,13,135,124,2,0        // vbroadcastss  0x27c87(%rip),%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,13,196,124,2,0        // vbroadcastss  0x27cc4(%rip),%ymm9        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,44,87,201                    // vxorps        %ymm9,%ymm10,%ymm9
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,20,123,2,0         // vbroadcastss  0x27b14(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,29,101,123,2,0        // vbroadcastss  0x27b65(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  196,65,44,89,208                    // vmulps        %ymm8,%ymm10,%ymm10
   .byte  197,180,92,192                      // vsubps        %ymm0,%ymm9,%ymm0
@@ -23026,23 +23007,23 @@
   .byte  197,50,89,80,76                     // vmulss        0x4c(%rax),%xmm9,%xmm10
   .byte  196,66,125,24,210                   // vbroadcastss  %xmm10,%ymm10
   .byte  197,44,88,208                       // vaddps        %ymm0,%ymm10,%ymm10
-  .byte  196,98,125,24,29,45,124,2,0         // vbroadcastss  0x27c2d(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  196,98,125,24,29,106,124,2,0        // vbroadcastss  0x27c6a(%rip),%ymm11        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
   .byte  197,116,89,217                      // vmulps        %ymm1,%ymm1,%ymm11
   .byte  196,98,125,184,216                  // vfmadd231ps   %ymm0,%ymm0,%ymm11
   .byte  196,193,50,89,193                   // vmulss        %xmm9,%xmm9,%xmm0
   .byte  196,226,125,24,192                  // vbroadcastss  %xmm0,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
-  .byte  196,98,125,24,13,12,124,2,0         // vbroadcastss  0x27c0c(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  196,98,125,24,13,73,124,2,0         // vbroadcastss  0x27c49(%rip),%ymm9        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,194,45,184,194                  // vfmadd231ps   %ymm10,%ymm10,%ymm0
   .byte  197,252,81,192                      // vsqrtps       %ymm0,%ymm0
   .byte  196,98,125,24,64,68                 // vbroadcastss  0x44(%rax),%ymm8
-  .byte  196,98,125,24,13,239,123,2,0        // vbroadcastss  0x27bef(%rip),%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,13,44,124,2,0         // vbroadcastss  0x27c2c(%rip),%ymm9        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,44,87,201                    // vxorps        %ymm9,%ymm10,%ymm9
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,124,122,2,0        // vbroadcastss  0x27a7c(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,29,205,122,2,0        // vbroadcastss  0x27acd(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  196,65,44,89,208                    // vmulps        %ymm8,%ymm10,%ymm10
   .byte  197,180,92,192                      // vsubps        %ymm0,%ymm9,%ymm0
@@ -23060,14 +23041,14 @@
   .byte  197,58,89,72,76                     // vmulss        0x4c(%rax),%xmm8,%xmm9
   .byte  196,66,125,24,201                   // vbroadcastss  %xmm9,%ymm9
   .byte  197,52,88,200                       // vaddps        %ymm0,%ymm9,%ymm9
-  .byte  196,98,125,24,21,155,123,2,0        // vbroadcastss  0x27b9b(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  196,98,125,24,21,216,123,2,0        // vbroadcastss  0x27bd8(%rip),%ymm10        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
   .byte  197,116,89,209                      // vmulps        %ymm1,%ymm1,%ymm10
   .byte  196,98,125,184,208                  // vfmadd231ps   %ymm0,%ymm0,%ymm10
   .byte  196,193,58,89,192                   // vmulss        %xmm8,%xmm8,%xmm0
   .byte  196,226,125,24,192                  // vbroadcastss  %xmm0,%ymm0
   .byte  197,172,92,192                      // vsubps        %ymm0,%ymm10,%ymm0
-  .byte  196,98,125,24,5,126,123,2,0         // vbroadcastss  0x27b7e(%rip),%ymm8        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,5,187,123,2,0         // vbroadcastss  0x27bbb(%rip),%ymm8        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,193,124,87,192                  // vxorps        %ymm8,%ymm0,%ymm0
   .byte  196,193,124,94,193                  // vdivps        %ymm9,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23110,7 +23091,7 @@
 FUNCTION(_sk_save_xy_hsw)
 _sk_save_xy_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,169,121,2,0         // vbroadcastss  0x279a9(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,250,121,2,0         // vbroadcastss  0x279fa(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -23144,9 +23125,9 @@
 FUNCTION(_sk_bilinear_nx_hsw)
 _sk_bilinear_nx_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,151,122,2,0        // vbroadcastss  0x27a97(%rip),%ymm0        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,5,212,122,2,0        // vbroadcastss  0x27ad4(%rip),%ymm0        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,42,121,2,0          // vbroadcastss  0x2792a(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,123,121,2,0         // vbroadcastss  0x2797b(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23157,7 +23138,7 @@
 FUNCTION(_sk_bilinear_px_hsw)
 _sk_bilinear_px_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,7,121,2,0          // vbroadcastss  0x27907(%rip),%ymm0        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,226,125,24,5,88,121,2,0         // vbroadcastss  0x27958(%rip),%ymm0        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,128,128,0,0,0            // vmovups       0x80(%rax),%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
@@ -23169,9 +23150,9 @@
 FUNCTION(_sk_bilinear_ny_hsw)
 _sk_bilinear_ny_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,72,122,2,0        // vbroadcastss  0x27a48(%rip),%ymm1        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,13,133,122,2,0       // vbroadcastss  0x27a85(%rip),%ymm1        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,218,120,2,0         // vbroadcastss  0x278da(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,43,121,2,0          // vbroadcastss  0x2792b(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23182,7 +23163,7 @@
 FUNCTION(_sk_bilinear_py_hsw)
 _sk_bilinear_py_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,183,120,2,0       // vbroadcastss  0x278b7(%rip),%ymm1        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,226,125,24,13,8,121,2,0         // vbroadcastss  0x27908(%rip),%ymm1        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
   .byte  197,124,16,128,192,0,0,0            // vmovups       0xc0(%rax),%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
@@ -23194,13 +23175,13 @@
 FUNCTION(_sk_bicubic_n3x_hsw)
 _sk_bicubic_n3x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,251,121,2,0        // vbroadcastss  0x279fb(%rip),%ymm0        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  .byte  196,226,125,24,5,56,122,2,0         // vbroadcastss  0x27a38(%rip),%ymm0        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,138,120,2,0         // vbroadcastss  0x2788a(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,219,120,2,0         // vbroadcastss  0x278db(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,232,120,2,0        // vbroadcastss  0x278e8(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  .byte  196,98,125,24,29,211,121,2,0        // vbroadcastss  0x279d3(%rip),%ymm11        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,57,121,2,0         // vbroadcastss  0x27939(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,29,16,122,2,0         // vbroadcastss  0x27a10(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,36,89,193                    // vmulps        %ymm9,%ymm11,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
@@ -23212,16 +23193,16 @@
 FUNCTION(_sk_bicubic_n1x_hsw)
 _sk_bicubic_n1x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,170,121,2,0        // vbroadcastss  0x279aa(%rip),%ymm0        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,5,231,121,2,0        // vbroadcastss  0x279e7(%rip),%ymm0        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,61,120,2,0          // vbroadcastss  0x2783d(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,142,120,2,0         // vbroadcastss  0x2788e(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,156,121,2,0        // vbroadcastss  0x2799c(%rip),%ymm9        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
-  .byte  196,98,125,24,21,143,121,2,0        // vbroadcastss  0x2798f(%rip),%ymm10        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,13,217,121,2,0        // vbroadcastss  0x279d9(%rip),%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,21,204,121,2,0        // vbroadcastss  0x279cc(%rip),%ymm10        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,66,61,168,209                   // vfmadd213ps   %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,13,17,120,2,0         // vbroadcastss  0x27811(%rip),%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,13,98,120,2,0         // vbroadcastss  0x27862(%rip),%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,66,61,184,202                   // vfmadd231ps   %ymm10,%ymm8,%ymm9
-  .byte  196,98,125,24,21,123,121,2,0        // vbroadcastss  0x2797b(%rip),%ymm10        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,21,184,121,2,0        // vbroadcastss  0x279b8(%rip),%ymm10        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  197,124,17,144,0,1,0,0              // vmovups       %ymm10,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23232,14 +23213,14 @@
 FUNCTION(_sk_bicubic_p1x_hsw)
 _sk_bicubic_p1x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,231,119,2,0         // vbroadcastss  0x277e7(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,56,120,2,0          // vbroadcastss  0x27838(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,136,128,0,0,0            // vmovups       0x80(%rax),%ymm9
-  .byte  196,98,125,24,21,70,121,2,0         // vbroadcastss  0x27946(%rip),%ymm10        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
-  .byte  196,98,125,24,29,57,121,2,0         // vbroadcastss  0x27939(%rip),%ymm11        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,21,131,121,2,0        // vbroadcastss  0x27983(%rip),%ymm10        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,29,118,121,2,0        // vbroadcastss  0x27976(%rip),%ymm11        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,66,53,168,218                   // vfmadd213ps   %ymm10,%ymm9,%ymm11
   .byte  196,66,53,168,216                   // vfmadd213ps   %ymm8,%ymm9,%ymm11
-  .byte  196,98,125,24,5,46,121,2,0          // vbroadcastss  0x2792e(%rip),%ymm8        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,5,107,121,2,0         // vbroadcastss  0x2796b(%rip),%ymm8        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,66,53,184,195                   // vfmadd231ps   %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23250,12 +23231,12 @@
 FUNCTION(_sk_bicubic_p3x_hsw)
 _sk_bicubic_p3x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,14,121,2,0         // vbroadcastss  0x2790e(%rip),%ymm0        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,226,125,24,5,75,121,2,0         // vbroadcastss  0x2794b(%rip),%ymm0        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,128,128,0,0,0            // vmovups       0x80(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,248,119,2,0        // vbroadcastss  0x277f8(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  .byte  196,98,125,24,29,227,120,2,0        // vbroadcastss  0x278e3(%rip),%ymm11        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,73,120,2,0         // vbroadcastss  0x27849(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,29,32,121,2,0         // vbroadcastss  0x27920(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,52,89,195                    // vmulps        %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
@@ -23267,13 +23248,13 @@
 FUNCTION(_sk_bicubic_n3y_hsw)
 _sk_bicubic_n3y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,190,120,2,0       // vbroadcastss  0x278be(%rip),%ymm1        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  .byte  196,226,125,24,13,251,120,2,0       // vbroadcastss  0x278fb(%rip),%ymm1        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,76,119,2,0          // vbroadcastss  0x2774c(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,157,119,2,0         // vbroadcastss  0x2779d(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,170,119,2,0        // vbroadcastss  0x277aa(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  .byte  196,98,125,24,29,149,120,2,0        // vbroadcastss  0x27895(%rip),%ymm11        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,251,119,2,0        // vbroadcastss  0x277fb(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,29,210,120,2,0        // vbroadcastss  0x278d2(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,36,89,193                    // vmulps        %ymm9,%ymm11,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
@@ -23285,16 +23266,16 @@
 FUNCTION(_sk_bicubic_n1y_hsw)
 _sk_bicubic_n1y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,108,120,2,0       // vbroadcastss  0x2786c(%rip),%ymm1        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,13,169,120,2,0       // vbroadcastss  0x278a9(%rip),%ymm1        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,254,118,2,0         // vbroadcastss  0x276fe(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,79,119,2,0          // vbroadcastss  0x2774f(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,93,120,2,0         // vbroadcastss  0x2785d(%rip),%ymm9        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
-  .byte  196,98,125,24,21,80,120,2,0         // vbroadcastss  0x27850(%rip),%ymm10        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,13,154,120,2,0        // vbroadcastss  0x2789a(%rip),%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,21,141,120,2,0        // vbroadcastss  0x2788d(%rip),%ymm10        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,66,61,168,209                   // vfmadd213ps   %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,13,210,118,2,0        // vbroadcastss  0x276d2(%rip),%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,13,35,119,2,0         // vbroadcastss  0x27723(%rip),%ymm9        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,66,61,184,202                   // vfmadd231ps   %ymm10,%ymm8,%ymm9
-  .byte  196,98,125,24,21,60,120,2,0         // vbroadcastss  0x2783c(%rip),%ymm10        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,21,121,120,2,0        // vbroadcastss  0x27879(%rip),%ymm10        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  197,124,17,144,64,1,0,0             // vmovups       %ymm10,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23305,14 +23286,14 @@
 FUNCTION(_sk_bicubic_p1y_hsw)
 _sk_bicubic_p1y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,168,118,2,0         // vbroadcastss  0x276a8(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,249,118,2,0         // vbroadcastss  0x276f9(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,188,88,72,64                    // vaddps        0x40(%rax),%ymm8,%ymm1
   .byte  197,124,16,136,192,0,0,0            // vmovups       0xc0(%rax),%ymm9
-  .byte  196,98,125,24,21,6,120,2,0          // vbroadcastss  0x27806(%rip),%ymm10        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
-  .byte  196,98,125,24,29,249,119,2,0        // vbroadcastss  0x277f9(%rip),%ymm11        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,21,67,120,2,0         // vbroadcastss  0x27843(%rip),%ymm10        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,29,54,120,2,0         // vbroadcastss  0x27836(%rip),%ymm11        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,66,53,168,218                   // vfmadd213ps   %ymm10,%ymm9,%ymm11
   .byte  196,66,53,168,216                   // vfmadd213ps   %ymm8,%ymm9,%ymm11
-  .byte  196,98,125,24,5,238,119,2,0         // vbroadcastss  0x277ee(%rip),%ymm8        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,5,43,120,2,0          // vbroadcastss  0x2782b(%rip),%ymm8        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,66,53,184,195                   // vfmadd231ps   %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -23323,12 +23304,12 @@
 FUNCTION(_sk_bicubic_p3y_hsw)
 _sk_bicubic_p3y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,206,119,2,0       // vbroadcastss  0x277ce(%rip),%ymm1        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,226,125,24,13,11,120,2,0        // vbroadcastss  0x2780b(%rip),%ymm1        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
   .byte  197,124,16,128,192,0,0,0            // vmovups       0xc0(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,183,118,2,0        // vbroadcastss  0x276b7(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  .byte  196,98,125,24,29,162,119,2,0        // vbroadcastss  0x277a2(%rip),%ymm11        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,8,119,2,0          // vbroadcastss  0x27708(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,29,223,119,2,0        // vbroadcastss  0x277df(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,52,89,195                    // vmulps        %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
@@ -23468,7 +23449,7 @@
   .byte  196,98,93,64,218                    // vpmulld       %ymm2,%ymm4,%ymm11
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  197,165,254,193                     // vpaddd        %ymm1,%ymm11,%ymm0
-  .byte  196,98,125,88,61,100,117,2,0        // vpbroadcastd  0x27564(%rip),%ymm15        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  196,98,125,88,61,161,117,2,0        // vpbroadcastd  0x275a1(%rip),%ymm15        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   .byte  196,194,125,64,199                  // vpmulld       %ymm15,%ymm0,%ymm0
   .byte  197,213,239,237                     // vpxor         %ymm5,%ymm5,%ymm5
   .byte  197,237,118,210                     // vpcmpeqd      %ymm2,%ymm2,%ymm2
@@ -23480,13 +23461,13 @@
   .byte  197,213,118,237                     // vpcmpeqd      %ymm5,%ymm5,%ymm5
   .byte  196,226,85,146,60,144               // vgatherdps    %ymm5,(%rax,%ymm2,4),%ymm7
   .byte  197,252,17,188,36,0,2,0,0           // vmovups       %ymm7,0x200(%rsp)
-  .byte  196,226,125,88,61,174,115,2,0       // vpbroadcastd  0x273ae(%rip),%ymm7        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  .byte  196,226,125,88,61,255,115,2,0       // vpbroadcastd  0x273ff(%rip),%ymm7        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   .byte  197,253,254,199                     // vpaddd        %ymm7,%ymm0,%ymm0
   .byte  197,236,87,210                      // vxorps        %ymm2,%ymm2,%ymm2
   .byte  197,213,118,237                     // vpcmpeqd      %ymm5,%ymm5,%ymm5
   .byte  196,226,85,146,20,128               // vgatherdps    %ymm5,(%rax,%ymm0,4),%ymm2
   .byte  197,252,17,148,36,32,1,0,0          // vmovups       %ymm2,0x120(%rsp)
-  .byte  196,226,125,24,5,246,116,2,0        // vbroadcastss  0x274f6(%rip),%ymm0        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,226,125,24,5,51,117,2,0         // vbroadcastss  0x27533(%rip),%ymm0        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   .byte  197,172,88,232                      // vaddps        %ymm0,%ymm10,%ymm5
   .byte  197,254,91,237                      // vcvttps2dq    %ymm5,%ymm5
   .byte  196,226,93,64,213                   // vpmulld       %ymm5,%ymm4,%ymm2
@@ -23748,7 +23729,7 @@
   .byte  197,254,127,132,36,0,1,0,0          // vmovdqu       %ymm0,0x100(%rsp)
   .byte  196,98,109,64,200                   // vpmulld       %ymm0,%ymm2,%ymm9
   .byte  197,181,254,199                     // vpaddd        %ymm7,%ymm9,%ymm0
-  .byte  196,98,125,88,21,143,111,2,0        // vpbroadcastd  0x26f8f(%rip),%ymm10        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  196,98,125,88,21,204,111,2,0        // vpbroadcastd  0x26fcc(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   .byte  196,194,125,64,194                  // vpmulld       %ymm10,%ymm0,%ymm0
   .byte  197,213,118,237                     // vpcmpeqd      %ymm5,%ymm5,%ymm5
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
@@ -23760,13 +23741,13 @@
   .byte  196,65,36,87,219                    // vxorps        %ymm11,%ymm11,%ymm11
   .byte  196,98,61,146,28,168                // vgatherdps    %ymm8,(%rax,%ymm5,4),%ymm11
   .byte  197,124,17,156,36,192,0,0,0         // vmovups       %ymm11,0xc0(%rsp)
-  .byte  196,98,125,88,29,214,109,2,0        // vpbroadcastd  0x26dd6(%rip),%ymm11        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  .byte  196,98,125,88,29,39,110,2,0         // vpbroadcastd  0x26e27(%rip),%ymm11        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   .byte  196,193,125,254,195                 // vpaddd        %ymm11,%ymm0,%ymm0
   .byte  196,65,61,118,192                   // vpcmpeqd      %ymm8,%ymm8,%ymm8
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
   .byte  196,226,61,146,44,128               // vgatherdps    %ymm8,(%rax,%ymm0,4),%ymm5
   .byte  197,252,17,108,36,32                // vmovups       %ymm5,0x20(%rsp)
-  .byte  196,226,125,24,5,31,111,2,0         // vbroadcastss  0x26f1f(%rip),%ymm0        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,226,125,24,5,92,111,2,0         // vbroadcastss  0x26f5c(%rip),%ymm0        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   .byte  197,116,88,192                      // vaddps        %ymm0,%ymm1,%ymm8
   .byte  196,65,126,91,192                   // vcvttps2dq    %ymm8,%ymm8
   .byte  196,194,109,64,232                  // vpmulld       %ymm8,%ymm2,%ymm5
@@ -24199,7 +24180,7 @@
   .byte  196,193,100,92,210                  // vsubps        %ymm10,%ymm3,%ymm2
   .byte  196,194,77,168,210                  // vfmadd213ps   %ymm10,%ymm6,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,249,99,2,0        // vbroadcastss  0x263f9(%rip),%ymm3        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,226,125,24,29,74,100,2,0        // vbroadcastss  0x2644a(%rip),%ymm3        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,252,16,164,36,96,6,0,0          // vmovups       0x660(%rsp),%ymm4
   .byte  197,252,16,172,36,128,6,0,0         // vmovups       0x680(%rsp),%ymm5
   .byte  197,252,16,180,36,160,6,0,0         // vmovups       0x6a0(%rsp),%ymm6
@@ -24207,6 +24188,25 @@
   .byte  72,129,196,248,6,0,0                // add           $0x6f8,%rsp
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_gauss_a_to_rgba_hsw
+.globl _sk_gauss_a_to_rgba_hsw
+FUNCTION(_sk_gauss_a_to_rgba_hsw)
+_sk_gauss_a_to_rgba_hsw:
+  .byte  196,226,125,24,5,132,101,2,0        // vbroadcastss  0x26584(%rip),%ymm0        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,226,125,24,13,119,101,2,0       // vbroadcastss  0x26577(%rip),%ymm1        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,226,101,168,200                 // vfmadd213ps   %ymm0,%ymm3,%ymm1
+  .byte  196,226,125,24,5,113,101,2,0        // vbroadcastss  0x26571(%rip),%ymm0        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,226,101,184,193                 // vfmadd231ps   %ymm1,%ymm3,%ymm0
+  .byte  196,226,125,24,13,103,101,2,0       // vbroadcastss  0x26567(%rip),%ymm1        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,226,101,184,200                 // vfmadd231ps   %ymm0,%ymm3,%ymm1
+  .byte  196,226,125,24,5,93,101,2,0         // vbroadcastss  0x2655d(%rip),%ymm0        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  196,226,101,184,193                 // vfmadd231ps   %ymm1,%ymm3,%ymm0
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
+  .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
+  .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_start_pipeline_avx
 .globl _sk_start_pipeline_avx
 FUNCTION(_sk_start_pipeline_avx)
@@ -30486,7 +30486,7 @@
   .byte  197,252,17,108,36,192               // vmovups       %ymm5,-0x40(%rsp)
   .byte  197,252,17,100,36,160               // vmovups       %ymm4,-0x60(%rsp)
   .byte  197,252,40,225                      // vmovaps       %ymm1,%ymm4
-  .byte  196,98,125,24,5,236,254,1,0         // vbroadcastss  0x1feec(%rip),%ymm8        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,5,216,254,1,0         // vbroadcastss  0x1fed8(%rip),%ymm8        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,124,84,216                   // vandps        %ymm8,%ymm0,%ymm11
   .byte  196,98,125,24,21,18,255,1,0         // vbroadcastss  0x1ff12(%rip),%ymm10        # 35748 <_sk_srcover_bgra_8888_sse2_lowp+0x440>
   .byte  196,65,124,84,226                   // vandps        %ymm10,%ymm0,%ymm12
@@ -31712,29 +31712,6 @@
   .byte  93                                  // pop           %rbp
   .byte  255,224                             // jmpq          *%rax
 
-HIDDEN _sk_gauss_a_to_rgba_avx
-.globl _sk_gauss_a_to_rgba_avx
-FUNCTION(_sk_gauss_a_to_rgba_avx)
-_sk_gauss_a_to_rgba_avx:
-  .byte  196,226,125,24,5,1,234,1,0          // vbroadcastss  0x1ea01(%rip),%ymm0        # 356e4 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
-  .byte  197,228,89,192                      // vmulps        %ymm0,%ymm3,%ymm0
-  .byte  196,226,125,24,13,248,233,1,0       // vbroadcastss  0x1e9f8(%rip),%ymm1        # 356e8 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
-  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,13,235,233,1,0       // vbroadcastss  0x1e9eb(%rip),%ymm1        # 356ec <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
-  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,13,222,233,1,0       // vbroadcastss  0x1e9de(%rip),%ymm1        # 356f0 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
-  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,13,209,233,1,0       // vbroadcastss  0x1e9d1(%rip),%ymm1        # 356f4 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
-  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
-  .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
-  .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  255,224                             // jmpq          *%rax
-
 HIDDEN _sk_gradient_avx
 .globl _sk_gradient_avx
 FUNCTION(_sk_gradient_avx)
@@ -31749,12 +31726,12 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,81                              // jb            16da6 <_sk_gradient_avx+0x6f>
+  .byte  114,81                              // jb            16d49 <_sk_gradient_avx+0x6f>
   .byte  72,139,88,72                        // mov           0x48(%rax),%rbx
   .byte  73,255,200                          // dec           %r8
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
-  .byte  196,98,125,24,21,78,232,1,0         // vbroadcastss  0x1e84e(%rip),%ymm10        # 355bc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
+  .byte  196,98,125,24,21,171,232,1,0        // vbroadcastss  0x1e8ab(%rip),%ymm10        # 355bc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  196,98,125,24,3                     // vbroadcastss  (%rbx),%ymm8
   .byte  197,60,194,192,2                    // vcmpleps      %ymm0,%ymm8,%ymm8
@@ -31766,7 +31743,7 @@
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  73,255,200                          // dec           %r8
-  .byte  117,204                             // jne           16d72 <_sk_gradient_avx+0x3b>
+  .byte  117,204                             // jne           16d15 <_sk_gradient_avx+0x3b>
   .byte  196,195,249,22,200,1                // vpextrq       $0x1,%xmm1,%r8
   .byte  69,137,193                          // mov           %r8d,%r9d
   .byte  73,193,232,32                       // shr           $0x20,%r8
@@ -31948,27 +31925,27 @@
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,124,229,1,0        // vbroadcastss  0x1e57c(%rip),%ymm13        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
+  .byte  196,98,125,24,45,197,229,1,0        // vbroadcastss  0x1e5c5(%rip),%ymm13        # 356e4 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,114,229,1,0        // vbroadcastss  0x1e572(%rip),%ymm14        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
+  .byte  196,98,125,24,53,187,229,1,0        // vbroadcastss  0x1e5bb(%rip),%ymm14        # 356e8 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,99,229,1,0         // vbroadcastss  0x1e563(%rip),%ymm14        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
+  .byte  196,98,125,24,53,172,229,1,0        // vbroadcastss  0x1e5ac(%rip),%ymm14        # 356ec <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,229                    // vmulps        %ymm13,%ymm12,%ymm12
-  .byte  196,98,125,24,45,84,229,1,0         // vbroadcastss  0x1e554(%rip),%ymm13        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  .byte  196,98,125,24,45,157,229,1,0        // vbroadcastss  0x1e59d(%rip),%ymm13        # 356f0 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,63,229,1,0         // vbroadcastss  0x1e53f(%rip),%ymm10        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  .byte  196,98,125,24,21,136,229,1,0        // vbroadcastss  0x1e588(%rip),%ymm10        # 356f4 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,209,227,1,0        // vbroadcastss  0x1e3d1(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,21,46,228,1,0         // vbroadcastss  0x1e42e(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,187,227,1,0        // vbroadcastss  0x1e3bb(%rip),%ymm10        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,21,24,228,1,0         // vbroadcastss  0x1e418(%rip),%ymm10        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -31998,7 +31975,7 @@
   .byte  196,67,121,4,210,0                  // vpermilps     $0x0,%xmm10,%xmm10
   .byte  196,67,45,24,210,1                  // vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
   .byte  197,44,88,208                       // vaddps        %ymm0,%ymm10,%ymm10
-  .byte  196,98,125,24,29,181,228,1,0        // vbroadcastss  0x1e4b5(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  196,98,125,24,29,254,228,1,0        // vbroadcastss  0x1e4fe(%rip),%ymm11        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
   .byte  197,252,89,192                      // vmulps        %ymm0,%ymm0,%ymm0
   .byte  197,116,89,217                      // vmulps        %ymm1,%ymm1,%ymm11
@@ -32007,17 +31984,17 @@
   .byte  196,227,121,4,192,0                 // vpermilps     $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
-  .byte  196,98,125,24,13,137,228,1,0        // vbroadcastss  0x1e489(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  196,98,125,24,13,210,228,1,0        // vbroadcastss  0x1e4d2(%rip),%ymm9        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,65,44,89,194                    // vmulps        %ymm10,%ymm10,%ymm8
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
   .byte  197,252,81,192                      // vsqrtps       %ymm0,%ymm0
   .byte  196,98,125,24,64,68                 // vbroadcastss  0x44(%rax),%ymm8
-  .byte  196,98,125,24,13,103,228,1,0        // vbroadcastss  0x1e467(%rip),%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,13,176,228,1,0        // vbroadcastss  0x1e4b0(%rip),%ymm9        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,44,87,201                    // vxorps        %ymm9,%ymm10,%ymm9
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,244,226,1,0        // vbroadcastss  0x1e2f4(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,29,81,227,1,0         // vbroadcastss  0x1e351(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  196,65,60,89,210                    // vmulps        %ymm10,%ymm8,%ymm10
   .byte  197,180,92,192                      // vsubps        %ymm0,%ymm9,%ymm0
@@ -32037,7 +32014,7 @@
   .byte  196,67,121,4,210,0                  // vpermilps     $0x0,%xmm10,%xmm10
   .byte  196,67,45,24,210,1                  // vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
   .byte  197,44,88,208                       // vaddps        %ymm0,%ymm10,%ymm10
-  .byte  196,98,125,24,29,7,228,1,0          // vbroadcastss  0x1e407(%rip),%ymm11        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  196,98,125,24,29,80,228,1,0         // vbroadcastss  0x1e450(%rip),%ymm11        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
   .byte  197,252,89,192                      // vmulps        %ymm0,%ymm0,%ymm0
   .byte  197,116,89,217                      // vmulps        %ymm1,%ymm1,%ymm11
@@ -32046,17 +32023,17 @@
   .byte  196,227,121,4,192,0                 // vpermilps     $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
-  .byte  196,98,125,24,13,219,227,1,0        // vbroadcastss  0x1e3db(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  .byte  196,98,125,24,13,36,228,1,0         // vbroadcastss  0x1e424(%rip),%ymm9        # 356fc <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,65,44,89,194                    // vmulps        %ymm10,%ymm10,%ymm8
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
   .byte  197,252,81,192                      // vsqrtps       %ymm0,%ymm0
   .byte  196,98,125,24,64,68                 // vbroadcastss  0x44(%rax),%ymm8
-  .byte  196,98,125,24,13,185,227,1,0        // vbroadcastss  0x1e3b9(%rip),%ymm9        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,13,2,228,1,0          // vbroadcastss  0x1e402(%rip),%ymm9        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,65,44,87,201                    // vxorps        %ymm9,%ymm10,%ymm9
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,70,226,1,0         // vbroadcastss  0x1e246(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,29,163,226,1,0        // vbroadcastss  0x1e2a3(%rip),%ymm11        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  196,65,60,89,210                    // vmulps        %ymm10,%ymm8,%ymm10
   .byte  197,180,92,192                      // vsubps        %ymm0,%ymm9,%ymm0
@@ -32075,7 +32052,7 @@
   .byte  196,67,121,4,201,0                  // vpermilps     $0x0,%xmm9,%xmm9
   .byte  196,67,53,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
   .byte  197,52,88,200                       // vaddps        %ymm0,%ymm9,%ymm9
-  .byte  196,98,125,24,21,95,227,1,0         // vbroadcastss  0x1e35f(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  .byte  196,98,125,24,21,168,227,1,0        // vbroadcastss  0x1e3a8(%rip),%ymm10        # 356f8 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
   .byte  197,252,89,192                      // vmulps        %ymm0,%ymm0,%ymm0
   .byte  197,116,89,209                      // vmulps        %ymm1,%ymm1,%ymm10
@@ -32084,7 +32061,7 @@
   .byte  196,227,121,4,192,0                 // vpermilps     $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,172,92,192                      // vsubps        %ymm0,%ymm10,%ymm0
-  .byte  196,98,125,24,5,55,227,1,0          // vbroadcastss  0x1e337(%rip),%ymm8        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  .byte  196,98,125,24,5,128,227,1,0         // vbroadcastss  0x1e380(%rip),%ymm8        # 35700 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   .byte  196,193,124,87,192                  // vxorps        %ymm8,%ymm0,%ymm0
   .byte  196,193,124,94,193                  // vdivps        %ymm9,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32128,7 +32105,7 @@
 FUNCTION(_sk_save_xy_avx)
 _sk_save_xy_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,93,225,1,0          // vbroadcastss  0x1e15d(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,186,225,1,0         // vbroadcastss  0x1e1ba(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -32165,9 +32142,9 @@
 FUNCTION(_sk_bilinear_nx_avx)
 _sk_bilinear_nx_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,67,226,1,0         // vbroadcastss  0x1e243(%rip),%ymm0        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,5,140,226,1,0        // vbroadcastss  0x1e28c(%rip),%ymm0        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,214,224,1,0         // vbroadcastss  0x1e0d6(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,51,225,1,0          // vbroadcastss  0x1e133(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32178,7 +32155,7 @@
 FUNCTION(_sk_bilinear_px_avx)
 _sk_bilinear_px_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,179,224,1,0        // vbroadcastss  0x1e0b3(%rip),%ymm0        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,226,125,24,5,16,225,1,0         // vbroadcastss  0x1e110(%rip),%ymm0        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,128,128,0,0,0            // vmovups       0x80(%rax),%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
@@ -32190,9 +32167,9 @@
 FUNCTION(_sk_bilinear_ny_avx)
 _sk_bilinear_ny_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,244,225,1,0       // vbroadcastss  0x1e1f4(%rip),%ymm1        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,13,61,226,1,0        // vbroadcastss  0x1e23d(%rip),%ymm1        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,134,224,1,0         // vbroadcastss  0x1e086(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,227,224,1,0         // vbroadcastss  0x1e0e3(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32203,7 +32180,7 @@
 FUNCTION(_sk_bilinear_py_avx)
 _sk_bilinear_py_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,99,224,1,0        // vbroadcastss  0x1e063(%rip),%ymm1        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,226,125,24,13,192,224,1,0       // vbroadcastss  0x1e0c0(%rip),%ymm1        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
   .byte  197,124,16,128,192,0,0,0            // vmovups       0xc0(%rax),%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
@@ -32215,14 +32192,14 @@
 FUNCTION(_sk_bicubic_n3x_avx)
 _sk_bicubic_n3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,167,225,1,0        // vbroadcastss  0x1e1a7(%rip),%ymm0        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  .byte  196,226,125,24,5,240,225,1,0        // vbroadcastss  0x1e1f0(%rip),%ymm0        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,54,224,1,0          // vbroadcastss  0x1e036(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,147,224,1,0         // vbroadcastss  0x1e093(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,136,225,1,0        // vbroadcastss  0x1e188(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,209,225,1,0        // vbroadcastss  0x1e1d1(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,134,224,1,0        // vbroadcastss  0x1e086(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,227,224,1,0        // vbroadcastss  0x1e0e3(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
@@ -32234,19 +32211,19 @@
 FUNCTION(_sk_bicubic_n1x_avx)
 _sk_bicubic_n1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,81,225,1,0         // vbroadcastss  0x1e151(%rip),%ymm0        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,5,154,225,1,0        // vbroadcastss  0x1e19a(%rip),%ymm0        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,228,223,1,0         // vbroadcastss  0x1dfe4(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,65,224,1,0          // vbroadcastss  0x1e041(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,128,0,0,0             // vsubps        0x80(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,63,225,1,0         // vbroadcastss  0x1e13f(%rip),%ymm9        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,13,136,225,1,0        // vbroadcastss  0x1e188(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,53,225,1,0         // vbroadcastss  0x1e135(%rip),%ymm10        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,98,125,24,21,126,225,1,0        // vbroadcastss  0x1e17e(%rip),%ymm10        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,174,223,1,0        // vbroadcastss  0x1dfae(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,21,11,224,1,0         // vbroadcastss  0x1e00b(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,19,225,1,0         // vbroadcastss  0x1e113(%rip),%ymm9        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,13,92,225,1,0         // vbroadcastss  0x1e15c(%rip),%ymm9        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32257,17 +32234,17 @@
 FUNCTION(_sk_bicubic_p1x_avx)
 _sk_bicubic_p1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,127,223,1,0         // vbroadcastss  0x1df7f(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,220,223,1,0         // vbroadcastss  0x1dfdc(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,136,128,0,0,0            // vmovups       0x80(%rax),%ymm9
-  .byte  196,98,125,24,21,218,224,1,0        // vbroadcastss  0x1e0da(%rip),%ymm10        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,21,35,225,1,0         // vbroadcastss  0x1e123(%rip),%ymm10        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,208,224,1,0        // vbroadcastss  0x1e0d0(%rip),%ymm11        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,98,125,24,29,25,225,1,0         // vbroadcastss  0x1e119(%rip),%ymm11        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,183,224,1,0        // vbroadcastss  0x1e0b7(%rip),%ymm9        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,13,0,225,1,0          // vbroadcastss  0x1e100(%rip),%ymm9        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32278,13 +32255,13 @@
 FUNCTION(_sk_bicubic_p3x_avx)
 _sk_bicubic_p3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,151,224,1,0        // vbroadcastss  0x1e097(%rip),%ymm0        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,226,125,24,5,224,224,1,0        // vbroadcastss  0x1e0e0(%rip),%ymm0        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,128,128,0,0,0            // vmovups       0x80(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,117,224,1,0        // vbroadcastss  0x1e075(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,190,224,1,0        // vbroadcastss  0x1e0be(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,115,223,1,0        // vbroadcastss  0x1df73(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,208,223,1,0        // vbroadcastss  0x1dfd0(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,0,1,0,0              // vmovups       %ymm8,0x100(%rax)
@@ -32296,14 +32273,14 @@
 FUNCTION(_sk_bicubic_n3y_avx)
 _sk_bicubic_n3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,66,224,1,0        // vbroadcastss  0x1e042(%rip),%ymm1        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  .byte  196,226,125,24,13,139,224,1,0       // vbroadcastss  0x1e08b(%rip),%ymm1        # 35708 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,208,222,1,0         // vbroadcastss  0x1ded0(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,45,223,1,0          // vbroadcastss  0x1df2d(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,34,224,1,0         // vbroadcastss  0x1e022(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,107,224,1,0        // vbroadcastss  0x1e06b(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,32,223,1,0         // vbroadcastss  0x1df20(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,125,223,1,0        // vbroadcastss  0x1df7d(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
@@ -32315,19 +32292,19 @@
 FUNCTION(_sk_bicubic_n1y_avx)
 _sk_bicubic_n1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,235,223,1,0       // vbroadcastss  0x1dfeb(%rip),%ymm1        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  .byte  196,226,125,24,13,52,224,1,0        // vbroadcastss  0x1e034(%rip),%ymm1        # 35704 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,125,222,1,0         // vbroadcastss  0x1de7d(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,98,125,24,5,218,222,1,0         // vbroadcastss  0x1deda(%rip),%ymm8        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,60,92,128,192,0,0,0             // vsubps        0xc0(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,216,223,1,0        // vbroadcastss  0x1dfd8(%rip),%ymm9        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,13,33,224,1,0         // vbroadcastss  0x1e021(%rip),%ymm9        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,206,223,1,0        // vbroadcastss  0x1dfce(%rip),%ymm10        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,98,125,24,21,23,224,1,0         // vbroadcastss  0x1e017(%rip),%ymm10        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,71,222,1,0         // vbroadcastss  0x1de47(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,21,164,222,1,0        // vbroadcastss  0x1dea4(%rip),%ymm10        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,172,223,1,0        // vbroadcastss  0x1dfac(%rip),%ymm9        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,13,245,223,1,0        // vbroadcastss  0x1dff5(%rip),%ymm9        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32338,17 +32315,17 @@
 FUNCTION(_sk_bicubic_p1y_avx)
 _sk_bicubic_p1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,24,222,1,0          // vbroadcastss  0x1de18(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  .byte  196,98,125,24,5,117,222,1,0         // vbroadcastss  0x1de75(%rip),%ymm8        # 355b4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   .byte  197,188,88,72,64                    // vaddps        0x40(%rax),%ymm8,%ymm1
   .byte  197,124,16,136,192,0,0,0            // vmovups       0xc0(%rax),%ymm9
-  .byte  196,98,125,24,21,114,223,1,0        // vbroadcastss  0x1df72(%rip),%ymm10        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  196,98,125,24,21,187,223,1,0        // vbroadcastss  0x1dfbb(%rip),%ymm10        # 35710 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,104,223,1,0        // vbroadcastss  0x1df68(%rip),%ymm11        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,98,125,24,29,177,223,1,0        // vbroadcastss  0x1dfb1(%rip),%ymm11        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,79,223,1,0         // vbroadcastss  0x1df4f(%rip),%ymm9        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  196,98,125,24,13,152,223,1,0        // vbroadcastss  0x1df98(%rip),%ymm9        # 35718 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32359,13 +32336,13 @@
 FUNCTION(_sk_bicubic_p3y_avx)
 _sk_bicubic_p3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,47,223,1,0        // vbroadcastss  0x1df2f(%rip),%ymm1        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  196,226,125,24,13,120,223,1,0       // vbroadcastss  0x1df78(%rip),%ymm1        # 35714 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   .byte  197,244,88,72,64                    // vaddps        0x40(%rax),%ymm1,%ymm1
   .byte  197,124,16,128,192,0,0,0            // vmovups       0xc0(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,12,223,1,0         // vbroadcastss  0x1df0c(%rip),%ymm10        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  .byte  196,98,125,24,21,85,223,1,0         // vbroadcastss  0x1df55(%rip),%ymm10        # 3570c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,10,222,1,0         // vbroadcastss  0x1de0a(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  .byte  196,98,125,24,21,103,222,1,0        // vbroadcastss  0x1de67(%rip),%ymm10        # 3562c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,64,1,0,0             // vmovups       %ymm8,0x140(%rax)
@@ -32513,7 +32490,7 @@
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  197,217,254,207                     // vpaddd        %xmm7,%xmm4,%xmm1
   .byte  197,249,254,193                     // vpaddd        %xmm1,%xmm0,%xmm0
-  .byte  196,226,121,24,37,164,220,1,0       // vbroadcastss  0x1dca4(%rip),%xmm4        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  196,226,121,24,37,237,220,1,0       // vbroadcastss  0x1dced(%rip),%xmm4        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   .byte  196,98,121,64,228                   // vpmulld       %xmm4,%xmm0,%xmm12
   .byte  196,226,49,64,198                   // vpmulld       %xmm6,%xmm9,%xmm0
   .byte  197,249,127,132,36,128,0,0,0        // vmovdqa       %xmm0,0x80(%rsp)
@@ -32567,7 +32544,7 @@
   .byte  196,161,122,16,60,128               // vmovss        (%rax,%r8,4),%xmm7
   .byte  196,227,73,33,247,48                // vinsertps     $0x30,%xmm7,%xmm6,%xmm6
   .byte  196,227,77,24,237,1                 // vinsertf128   $0x1,%xmm5,%ymm6,%ymm5
-  .byte  196,98,121,24,21,9,218,1,0          // vbroadcastss  0x1da09(%rip),%xmm10        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  .byte  196,98,121,24,21,102,218,1,0        // vbroadcastss  0x1da66(%rip),%xmm10        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   .byte  196,193,105,254,210                 // vpaddd        %xmm10,%xmm2,%xmm2
   .byte  196,195,249,22,208,1                // vpextrq       $0x1,%xmm2,%r8
   .byte  196,193,249,126,209                 // vmovq         %xmm2,%r9
@@ -32595,7 +32572,7 @@
   .byte  196,161,122,16,60,128               // vmovss        (%rax,%r8,4),%xmm7
   .byte  196,227,73,33,247,48                // vinsertps     $0x30,%xmm7,%xmm6,%xmm6
   .byte  196,227,77,24,210,1                 // vinsertf128   $0x1,%xmm2,%ymm6,%ymm2
-  .byte  196,226,125,24,61,225,218,1,0       // vbroadcastss  0x1dae1(%rip),%ymm7        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,226,125,24,61,42,219,1,0        // vbroadcastss  0x1db2a(%rip),%ymm7        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   .byte  197,148,88,247                      // vaddps        %ymm7,%ymm13,%ymm6
   .byte  197,124,40,231                      // vmovaps       %ymm7,%ymm12
   .byte  197,124,17,36,36                    // vmovups       %ymm12,(%rsp)
@@ -33338,7 +33315,7 @@
   .byte  197,249,127,68,36,32                // vmovdqa       %xmm0,0x20(%rsp)
   .byte  197,225,254,216                     // vpaddd        %xmm0,%xmm3,%xmm3
   .byte  197,233,254,195                     // vpaddd        %xmm3,%xmm2,%xmm0
-  .byte  196,98,121,24,13,165,203,1,0        // vbroadcastss  0x1cba5(%rip),%xmm9        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  196,98,121,24,13,238,203,1,0        // vbroadcastss  0x1cbee(%rip),%xmm9        # 35720 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   .byte  196,194,121,64,209                  // vpmulld       %xmm9,%xmm0,%xmm2
   .byte  196,226,81,64,199                   // vpmulld       %xmm7,%xmm5,%xmm0
   .byte  197,249,127,132,36,96,1,0,0         // vmovdqa       %xmm0,0x160(%rsp)
@@ -33395,7 +33372,7 @@
   .byte  196,161,122,16,60,128               // vmovss        (%rax,%r8,4),%xmm7
   .byte  196,227,73,33,247,48                // vinsertps     $0x30,%xmm7,%xmm6,%xmm6
   .byte  196,227,77,24,237,1                 // vinsertf128   $0x1,%xmm5,%ymm6,%ymm5
-  .byte  196,98,121,24,21,250,200,1,0        // vbroadcastss  0x1c8fa(%rip),%xmm10        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  .byte  196,98,121,24,21,87,201,1,0         // vbroadcastss  0x1c957(%rip),%xmm10        # 355c4 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   .byte  196,193,121,254,194                 // vpaddd        %xmm10,%xmm0,%xmm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
   .byte  196,193,249,126,193                 // vmovq         %xmm0,%r9
@@ -33423,7 +33400,7 @@
   .byte  196,161,122,16,52,128               // vmovss        (%rax,%r8,4),%xmm6
   .byte  196,227,105,33,214,48               // vinsertps     $0x30,%xmm6,%xmm2,%xmm2
   .byte  196,227,109,24,208,1                // vinsertf128   $0x1,%xmm0,%ymm2,%ymm2
-  .byte  196,98,125,24,37,210,201,1,0        // vbroadcastss  0x1c9d2(%rip),%ymm12        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  196,98,125,24,37,27,202,1,0         // vbroadcastss  0x1ca1b(%rip),%ymm12        # 3571c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   .byte  196,193,4,88,196                    // vaddps        %ymm12,%ymm15,%ymm0
   .byte  197,124,17,36,36                    // vmovups       %ymm12,(%rsp)
   .byte  197,254,91,192                      // vcvttps2dq    %ymm0,%ymm0
@@ -34821,7 +34798,7 @@
   .byte  197,228,89,210                      // vmulps        %ymm2,%ymm3,%ymm2
   .byte  197,220,88,210                      // vaddps        %ymm2,%ymm4,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,27,171,1,0        // vbroadcastss  0x1ab1b(%rip),%ymm3        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  .byte  196,226,125,24,29,120,171,1,0       // vbroadcastss  0x1ab78(%rip),%ymm3        # 355b8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   .byte  197,252,16,164,36,64,2,0,0          // vmovups       0x240(%rsp),%ymm4
   .byte  197,252,16,172,36,96,2,0,0          // vmovups       0x260(%rsp),%ymm5
   .byte  197,252,16,180,36,128,2,0,0         // vmovups       0x280(%rsp),%ymm6
@@ -34833,6 +34810,29 @@
   .byte  65,95                               // pop           %r15
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_gauss_a_to_rgba_avx
+.globl _sk_gauss_a_to_rgba_avx
+FUNCTION(_sk_gauss_a_to_rgba_avx)
+_sk_gauss_a_to_rgba_avx:
+  .byte  196,226,125,24,5,167,172,1,0        // vbroadcastss  0x1aca7(%rip),%ymm0        # 35724 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  .byte  197,228,89,192                      // vmulps        %ymm0,%ymm3,%ymm0
+  .byte  196,226,125,24,13,158,172,1,0       // vbroadcastss  0x1ac9e(%rip),%ymm1        # 35728 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
+  .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
+  .byte  196,226,125,24,13,145,172,1,0       // vbroadcastss  0x1ac91(%rip),%ymm1        # 3572c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
+  .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
+  .byte  196,226,125,24,13,132,172,1,0       // vbroadcastss  0x1ac84(%rip),%ymm1        # 35730 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
+  .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
+  .byte  196,226,125,24,13,119,172,1,0       // vbroadcastss  0x1ac77(%rip),%ymm1        # 35734 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
+  .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
+  .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_start_pipeline_sse41
 .globl _sk_start_pipeline_sse41
 FUNCTION(_sk_start_pipeline_sse41)
@@ -40824,25 +40824,6 @@
   .byte  65,94                               // pop           %r14
   .byte  255,224                             // jmpq          *%rax
 
-HIDDEN _sk_gauss_a_to_rgba_sse41
-.globl _sk_gauss_a_to_rgba_sse41
-FUNCTION(_sk_gauss_a_to_rgba_sse41)
-_sk_gauss_a_to_rgba_sse41:
-  .byte  15,40,5,0,95,1,0                    // movaps        0x15f00(%rip),%xmm0        # 363e0 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,6,95,1,0                    // addps         0x15f06(%rip),%xmm0        # 363f0 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,12,95,1,0                   // addps         0x15f0c(%rip),%xmm0        # 36400 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,18,95,1,0                   // addps         0x15f12(%rip),%xmm0        # 36410 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,24,95,1,0                   // addps         0x15f18(%rip),%xmm0        # 36420 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
-  .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,200                           // movaps        %xmm0,%xmm1
-  .byte  15,40,208                           // movaps        %xmm0,%xmm2
-  .byte  15,40,216                           // movaps        %xmm0,%xmm3
-  .byte  255,224                             // jmpq          *%rax
-
 HIDDEN _sk_gradient_sse41
 .globl _sk_gradient_sse41
 FUNCTION(_sk_gradient_sse41)
@@ -40851,7 +40832,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,41                              // jb            2054d <_sk_gradient_sse41+0x38>
+  .byte  114,41                              // jb            20511 <_sk_gradient_sse41+0x38>
   .byte  76,139,72,72                        // mov           0x48(%rax),%r9
   .byte  73,255,200                          // dec           %r8
   .byte  73,131,193,4                        // add           $0x4,%r9
@@ -40862,7 +40843,7 @@
   .byte  102,15,250,202                      // psubd         %xmm2,%xmm1
   .byte  73,131,193,4                        // add           $0x4,%r9
   .byte  73,255,200                          // dec           %r8
-  .byte  117,230                             // jne           20533 <_sk_gradient_sse41+0x1e>
+  .byte  117,230                             // jne           204f7 <_sk_gradient_sse41+0x1e>
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  102,73,15,58,22,200,1               // pextrq        $0x1,%xmm1,%r8
@@ -40993,26 +40974,26 @@
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,40,236                        // movaps        %xmm12,%xmm13
   .byte  69,15,89,237                        // mulps         %xmm13,%xmm13
-  .byte  68,15,40,21,143,92,1,0              // movaps        0x15c8f(%rip),%xmm10        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  .byte  68,15,40,21,123,92,1,0              // movaps        0x15c7b(%rip),%xmm10        # 363e0 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,147,92,1,0              // addps         0x15c93(%rip),%xmm10        # 36440 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  .byte  68,15,88,21,127,92,1,0              // addps         0x15c7f(%rip),%xmm10        # 363f0 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,151,92,1,0              // addps         0x15c97(%rip),%xmm10        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,88,21,131,92,1,0              // addps         0x15c83(%rip),%xmm10        # 36400 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,155,92,1,0              // addps         0x15c9b(%rip),%xmm10        # 36460 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  .byte  68,15,88,21,135,92,1,0              // addps         0x15c87(%rip),%xmm10        # 36410 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,194,195,1                     // cmpltps       %xmm11,%xmm0
-  .byte  68,15,40,29,154,92,1,0              // movaps        0x15c9a(%rip),%xmm11        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,40,29,134,92,1,0              // movaps        0x15c86(%rip),%xmm11        # 36420 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  102,69,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm10
   .byte  69,15,194,200,1                     // cmpltps       %xmm8,%xmm9
-  .byte  68,15,40,29,83,86,1,0               // movaps        0x15653(%rip),%xmm11        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,29,143,86,1,0              // movaps        0x1568f(%rip),%xmm11        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  102,69,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm10
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
   .byte  65,15,194,192,1                     // cmpltps       %xmm8,%xmm0
-  .byte  68,15,40,13,69,86,1,0               // movaps        0x15645(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  68,15,40,13,129,86,1,0              // movaps        0x15681(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  102,69,15,56,20,209                 // blendvps      %xmm0,%xmm9,%xmm10
   .byte  69,15,194,194,7                     // cmpordps      %xmm10,%xmm8
@@ -41046,7 +41027,7 @@
   .byte  243,69,15,89,203                    // mulss         %xmm11,%xmm9
   .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
   .byte  68,15,88,200                        // addps         %xmm0,%xmm9
-  .byte  68,15,89,13,14,92,1,0               // mulps         0x15c0e(%rip),%xmm9        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
+  .byte  68,15,89,13,250,91,1,0              // mulps         0x15bfa(%rip),%xmm9        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
@@ -41054,7 +41035,7 @@
   .byte  243,69,15,89,219                    // mulss         %xmm11,%xmm11
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,92,227                        // subps         %xmm11,%xmm12
-  .byte  68,15,89,21,249,91,1,0              // mulps         0x15bf9(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
+  .byte  68,15,89,21,229,91,1,0              // mulps         0x15be5(%rip),%xmm10        # 36440 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
@@ -41063,8 +41044,8 @@
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  65,15,92,193                        // subps         %xmm9,%xmm0
-  .byte  68,15,87,13,161,90,1,0              // xorps         0x15aa1(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
-  .byte  68,15,89,5,121,85,1,0               // mulps         0x15579(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,87,13,221,90,1,0              // xorps         0x15add(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
+  .byte  68,15,89,5,181,85,1,0               // mulps         0x155b5(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
@@ -41085,7 +41066,7 @@
   .byte  243,69,15,89,203                    // mulss         %xmm11,%xmm9
   .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
   .byte  68,15,88,200                        // addps         %xmm0,%xmm9
-  .byte  68,15,89,13,112,91,1,0              // mulps         0x15b70(%rip),%xmm9        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
+  .byte  68,15,89,13,92,91,1,0               // mulps         0x15b5c(%rip),%xmm9        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
@@ -41093,7 +41074,7 @@
   .byte  243,69,15,89,219                    // mulss         %xmm11,%xmm11
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,92,227                        // subps         %xmm11,%xmm12
-  .byte  68,15,89,21,91,91,1,0               // mulps         0x15b5b(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
+  .byte  68,15,89,21,71,91,1,0               // mulps         0x15b47(%rip),%xmm10        # 36440 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
@@ -41102,8 +41083,8 @@
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  65,15,92,193                        // subps         %xmm9,%xmm0
-  .byte  68,15,87,13,3,90,1,0                // xorps         0x15a03(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
-  .byte  68,15,89,5,219,84,1,0               // mulps         0x154db(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,87,13,63,90,1,0               // xorps         0x15a3f(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
+  .byte  68,15,89,5,23,85,1,0                // mulps         0x15517(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
@@ -41121,7 +41102,7 @@
   .byte  243,69,15,89,200                    // mulss         %xmm8,%xmm9
   .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
   .byte  68,15,88,200                        // addps         %xmm0,%xmm9
-  .byte  68,15,89,13,227,90,1,0              // mulps         0x15ae3(%rip),%xmm9        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
+  .byte  68,15,89,13,207,90,1,0              // mulps         0x15acf(%rip),%xmm9        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
   .byte  69,15,89,210                        // mulps         %xmm10,%xmm10
@@ -41129,7 +41110,7 @@
   .byte  243,69,15,89,192                    // mulss         %xmm8,%xmm8
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
   .byte  65,15,92,192                        // subps         %xmm8,%xmm0
-  .byte  15,87,5,159,89,1,0                  // xorps         0x1599f(%rip),%xmm0        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
+  .byte  15,87,5,219,89,1,0                  // xorps         0x159db(%rip),%xmm0        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
   .byte  65,15,94,193                        // divps         %xmm9,%xmm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41177,7 +41158,7 @@
 FUNCTION(_sk_save_xy_sse41)
 _sk_save_xy_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,254,83,1,0               // movaps        0x153fe(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,5,58,84,1,0                // movaps        0x1543a(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  15,17,0                             // movups        %xmm0,(%rax)
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,88,200                        // addps         %xmm8,%xmm9
@@ -41221,8 +41202,8 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,199,89,1,0                  // addps         0x159c7(%rip),%xmm0        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,111,83,1,0              // movaps        0x1536f(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,5,179,89,1,0                  // addps         0x159b3(%rip),%xmm0        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,171,83,1,0              // movaps        0x153ab(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,0,1,0,0                // movups        %xmm9,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -41235,7 +41216,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,59,83,1,0                   // addps         0x1533b(%rip),%xmm0        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  15,88,5,119,83,1,0                  // addps         0x15377(%rip),%xmm0        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  68,15,17,128,0,1,0,0                // movups        %xmm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41247,8 +41228,8 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,122,89,1,0                 // addps         0x1597a(%rip),%xmm1        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,34,83,1,0               // movaps        0x15322(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,13,102,89,1,0                 // addps         0x15966(%rip),%xmm1        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,94,83,1,0               // movaps        0x1535e(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,64,1,0,0               // movups        %xmm9,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -41261,7 +41242,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,237,82,1,0                 // addps         0x152ed(%rip),%xmm1        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  15,88,13,41,83,1,0                  // addps         0x15329(%rip),%xmm1        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  68,15,17,128,64,1,0,0               // movups        %xmm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41273,13 +41254,13 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,61,89,1,0                   // addps         0x1593d(%rip),%xmm0        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
-  .byte  68,15,40,13,213,82,1,0              // movaps        0x152d5(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,5,41,89,1,0                   // addps         0x15929(%rip),%xmm0        # 36460 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  .byte  68,15,40,13,17,83,1,0               // movaps        0x15311(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,49,89,1,0               // mulps         0x15931(%rip),%xmm9        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,13,185,84,1,0              // addps         0x154b9(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,13,29,89,1,0               // mulps         0x1591d(%rip),%xmm9        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,13,245,84,1,0              // addps         0x154f5(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,0,1,0,0                // movups        %xmm9,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -41292,16 +41273,16 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,229,88,1,0                  // addps         0x158e5(%rip),%xmm0        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,141,82,1,0              // movaps        0x1528d(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,5,209,88,1,0                  // addps         0x158d1(%rip),%xmm0        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,201,82,1,0              // movaps        0x152c9(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,1,89,1,0                 // movaps        0x15901(%rip),%xmm8        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,5,237,88,1,0               // movaps        0x158ed(%rip),%xmm8        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,5,89,1,0                 // addps         0x15905(%rip),%xmm8        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,5,241,88,1,0               // addps         0x158f1(%rip),%xmm8        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,89,82,1,0                // addps         0x15259(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,88,5,149,82,1,0               // addps         0x15295(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,253,88,1,0               // addps         0x158fd(%rip),%xmm8        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,5,233,88,1,0               // addps         0x158e9(%rip),%xmm8        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,128,0,1,0,0                // movups        %xmm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41311,17 +41292,17 @@
 FUNCTION(_sk_bicubic_p1x_sse41)
 _sk_bicubic_p1x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,55,82,1,0                // movaps        0x15237(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,5,115,82,1,0               // movaps        0x15273(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,136,128,0,0,0              // movups        0x80(%rax),%xmm9
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,21,176,88,1,0              // movaps        0x158b0(%rip),%xmm10        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,21,156,88,1,0              // movaps        0x1589c(%rip),%xmm10        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,180,88,1,0              // addps         0x158b4(%rip),%xmm10        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,21,160,88,1,0              // addps         0x158a0(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,176,88,1,0              // addps         0x158b0(%rip),%xmm10        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,21,156,88,1,0              // addps         0x1589c(%rip),%xmm10        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,144,0,1,0,0                // movups        %xmm10,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41333,11 +41314,11 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,128,88,1,0                  // addps         0x15880(%rip),%xmm0        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  15,88,5,108,88,1,0                  // addps         0x1586c(%rip),%xmm0        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,80,88,1,0                // mulps         0x15850(%rip),%xmm8        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,5,216,83,1,0               // addps         0x153d8(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,5,60,88,1,0                // mulps         0x1583c(%rip),%xmm8        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,5,20,84,1,0                // addps         0x15414(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,0,1,0,0                // movups        %xmm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -41350,13 +41331,13 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,19,88,1,0                  // addps         0x15813(%rip),%xmm1        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
-  .byte  68,15,40,13,171,81,1,0              // movaps        0x151ab(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,13,255,87,1,0                 // addps         0x157ff(%rip),%xmm1        # 36460 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  .byte  68,15,40,13,231,81,1,0              // movaps        0x151e7(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,7,88,1,0                // mulps         0x15807(%rip),%xmm9        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,13,143,83,1,0              // addps         0x1538f(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,13,243,87,1,0              // mulps         0x157f3(%rip),%xmm9        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,13,203,83,1,0              // addps         0x153cb(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,64,1,0,0               // movups        %xmm9,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -41369,16 +41350,16 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,186,87,1,0                 // addps         0x157ba(%rip),%xmm1        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,98,81,1,0               // movaps        0x15162(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,13,166,87,1,0                 // addps         0x157a6(%rip),%xmm1        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,158,81,1,0              // movaps        0x1519e(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,214,87,1,0               // movaps        0x157d6(%rip),%xmm8        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,5,194,87,1,0               // movaps        0x157c2(%rip),%xmm8        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,218,87,1,0               // addps         0x157da(%rip),%xmm8        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,5,198,87,1,0               // addps         0x157c6(%rip),%xmm8        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,46,81,1,0                // addps         0x1512e(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,88,5,106,81,1,0               // addps         0x1516a(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,210,87,1,0               // addps         0x157d2(%rip),%xmm8        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,5,190,87,1,0               // addps         0x157be(%rip),%xmm8        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,128,64,1,0,0               // movups        %xmm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41388,17 +41369,17 @@
 FUNCTION(_sk_bicubic_p1y_sse41)
 _sk_bicubic_p1y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,12,81,1,0                // movaps        0x1510c(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,5,72,81,1,0                // movaps        0x15148(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,136,192,0,0,0              // movups        0xc0(%rax),%xmm9
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  68,15,40,21,132,87,1,0              // movaps        0x15784(%rip),%xmm10        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,21,112,87,1,0              // movaps        0x15770(%rip),%xmm10        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,136,87,1,0              // addps         0x15788(%rip),%xmm10        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,21,116,87,1,0              // addps         0x15774(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,132,87,1,0              // addps         0x15784(%rip),%xmm10        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,21,112,87,1,0              // addps         0x15770(%rip),%xmm10        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,144,64,1,0,0               // movups        %xmm10,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -41410,11 +41391,11 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,83,87,1,0                  // addps         0x15753(%rip),%xmm1        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  15,88,13,63,87,1,0                  // addps         0x1573f(%rip),%xmm1        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,35,87,1,0                // mulps         0x15723(%rip),%xmm8        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,5,171,82,1,0               // addps         0x152ab(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,5,15,87,1,0                // mulps         0x1570f(%rip),%xmm8        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,5,231,82,1,0               // addps         0x152e7(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,64,1,0,0               // movups        %xmm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -41525,7 +41506,7 @@
   .byte  15,89,194                           // mulps         %xmm2,%xmm0
   .byte  15,41,68,36,208                     // movaps        %xmm0,-0x30(%rsp)
   .byte  243,15,91,240                       // cvttps2dq     %xmm0,%xmm6
-  .byte  15,40,37,14,86,1,0                  // movaps        0x1560e(%rip),%xmm4        # 36500 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  .byte  15,40,37,250,85,1,0                 // movaps        0x155fa(%rip),%xmm4        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   .byte  15,88,196                           // addps         %xmm4,%xmm0
   .byte  15,41,68,36,176                     // movaps        %xmm0,-0x50(%rsp)
   .byte  102,65,15,110,208                   // movd          %r8d,%xmm2
@@ -41564,7 +41545,7 @@
   .byte  102,68,15,56,64,192                 // pmulld        %xmm0,%xmm8
   .byte  102,65,15,111,216                   // movdqa        %xmm8,%xmm3
   .byte  102,15,254,218                      // paddd         %xmm2,%xmm3
-  .byte  102,68,15,111,37,101,85,1,0         // movdqa        0x15565(%rip),%xmm12        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,68,15,111,37,81,85,1,0          // movdqa        0x15551(%rip),%xmm12        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,65,15,56,64,220                 // pmulld        %xmm12,%xmm3
   .byte  102,15,118,228                      // pcmpeqd       %xmm4,%xmm4
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
@@ -41587,7 +41568,7 @@
   .byte  102,65,15,58,22,217,1               // pextrd        $0x1,%xmm3,%r9d
   .byte  102,65,15,58,22,218,2               // pextrd        $0x2,%xmm3,%r10d
   .byte  102,65,15,58,22,219,3               // pextrd        $0x3,%xmm3,%r11d
-  .byte  102,15,111,37,89,78,1,0             // movdqa        0x14e59(%rip),%xmm4        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,37,149,78,1,0            // movdqa        0x14e95(%rip),%xmm4        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,254,220                      // paddd         %xmm4,%xmm3
   .byte  102,15,111,252                      // movdqa        %xmm4,%xmm7
   .byte  102,73,15,58,22,222,1               // pextrq        $0x1,%xmm3,%r14
@@ -41760,7 +41741,7 @@
   .byte  102,68,15,254,202                   // paddd         %xmm2,%xmm9
   .byte  102,65,15,111,192                   // movdqa        %xmm8,%xmm0
   .byte  102,65,15,254,193                   // paddd         %xmm9,%xmm0
-  .byte  102,15,111,45,116,81,1,0            // movdqa        0x15174(%rip),%xmm5        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,45,96,81,1,0             // movdqa        0x15160(%rip),%xmm5        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,56,64,197                    // pmulld        %xmm5,%xmm0
   .byte  102,15,111,200                      // movdqa        %xmm0,%xmm1
   .byte  102,15,118,246                      // pcmpeqd       %xmm6,%xmm6
@@ -41781,7 +41762,7 @@
   .byte  102,65,15,58,22,193,1               // pextrd        $0x1,%xmm0,%r9d
   .byte  102,65,15,58,22,194,2               // pextrd        $0x2,%xmm0,%r10d
   .byte  102,65,15,58,22,195,3               // pextrd        $0x3,%xmm0,%r11d
-  .byte  102,15,111,61,117,74,1,0            // movdqa        0x14a75(%rip),%xmm7        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,61,177,74,1,0            // movdqa        0x14ab1(%rip),%xmm7        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,254,199                      // paddd         %xmm7,%xmm0
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
   .byte  102,72,15,126,195                   // movq          %xmm0,%rbx
@@ -41847,7 +41828,7 @@
   .byte  15,88,248                           // addps         %xmm0,%xmm7
   .byte  102,68,15,254,210                   // paddd         %xmm2,%xmm10
   .byte  102,69,15,254,194                   // paddd         %xmm10,%xmm8
-  .byte  102,15,111,13,176,79,1,0            // movdqa        0x14fb0(%rip),%xmm1        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,13,156,79,1,0            // movdqa        0x14f9c(%rip),%xmm1        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,68,15,56,64,193                 // pmulld        %xmm1,%xmm8
   .byte  102,65,15,111,192                   // movdqa        %xmm8,%xmm0
   .byte  102,15,118,237                      // pcmpeqd       %xmm5,%xmm5
@@ -41868,7 +41849,7 @@
   .byte  102,69,15,58,22,193,1               // pextrd        $0x1,%xmm8,%r9d
   .byte  102,69,15,58,22,194,2               // pextrd        $0x2,%xmm8,%r10d
   .byte  102,69,15,58,22,195,3               // pextrd        $0x3,%xmm8,%r11d
-  .byte  102,15,111,21,171,72,1,0            // movdqa        0x148ab(%rip),%xmm2        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,21,231,72,1,0            // movdqa        0x148e7(%rip),%xmm2        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,68,15,254,194                   // paddd         %xmm2,%xmm8
   .byte  102,76,15,58,22,193,1               // pextrq        $0x1,%xmm8,%rcx
   .byte  102,76,15,126,195                   // movq          %xmm8,%rbx
@@ -41995,7 +41976,7 @@
   .byte  15,89,203                           // mulps         %xmm3,%xmm1
   .byte  15,41,76,36,48                      // movaps        %xmm1,0x30(%rsp)
   .byte  243,15,91,249                       // cvttps2dq     %xmm1,%xmm7
-  .byte  15,40,29,14,77,1,0                  // movaps        0x14d0e(%rip),%xmm3        # 36500 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  .byte  15,40,29,250,76,1,0                 // movaps        0x14cfa(%rip),%xmm3        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   .byte  15,88,203                           // addps         %xmm3,%xmm1
   .byte  15,41,76,36,160                     // movaps        %xmm1,-0x60(%rsp)
   .byte  102,65,15,110,232                   // movd          %r8d,%xmm5
@@ -42052,7 +42033,7 @@
   .byte  102,68,15,56,64,253                 // pmulld        %xmm5,%xmm15
   .byte  102,65,15,111,223                   // movdqa        %xmm15,%xmm3
   .byte  102,15,254,217                      // paddd         %xmm1,%xmm3
-  .byte  102,68,15,111,29,13,76,1,0          // movdqa        0x14c0d(%rip),%xmm11        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,68,15,111,29,249,75,1,0         // movdqa        0x14bf9(%rip),%xmm11        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,65,15,56,64,219                 // pmulld        %xmm11,%xmm3
   .byte  102,15,118,192                      // pcmpeqd       %xmm0,%xmm0
   .byte  102,15,111,243                      // movdqa        %xmm3,%xmm6
@@ -42074,7 +42055,7 @@
   .byte  102,65,15,58,22,217,1               // pextrd        $0x1,%xmm3,%r9d
   .byte  102,65,15,58,22,218,2               // pextrd        $0x2,%xmm3,%r10d
   .byte  102,65,15,58,22,219,3               // pextrd        $0x3,%xmm3,%r11d
-  .byte  102,68,15,111,21,5,69,1,0           // movdqa        0x14505(%rip),%xmm10        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,21,65,69,1,0          // movdqa        0x14541(%rip),%xmm10        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,218                   // paddd         %xmm10,%xmm3
   .byte  102,73,15,58,22,222,1               // pextrq        $0x1,%xmm3,%r14
   .byte  102,72,15,126,219                   // movq          %xmm3,%rbx
@@ -42092,7 +42073,7 @@
   .byte  102,65,15,254,201                   // paddd         %xmm9,%xmm1
   .byte  102,65,15,56,64,203                 // pmulld        %xmm11,%xmm1
   .byte  102,15,111,217                      // movdqa        %xmm1,%xmm3
-  .byte  102,15,250,29,67,75,1,0             // psubd         0x14b43(%rip),%xmm3        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,29,47,75,1,0             // psubd         0x14b2f(%rip),%xmm3        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,73,15,58,22,222,1               // pextrq        $0x1,%xmm3,%r14
   .byte  102,72,15,126,219                   // movq          %xmm3,%rbx
   .byte  65,137,223                          // mov           %ebx,%r15d
@@ -42147,7 +42128,7 @@
   .byte  102,65,15,254,199                   // paddd         %xmm15,%xmm0
   .byte  102,65,15,56,64,195                 // pmulld        %xmm11,%xmm0
   .byte  102,15,111,232                      // movdqa        %xmm0,%xmm5
-  .byte  102,15,250,45,35,74,1,0             // psubd         0x14a23(%rip),%xmm5        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,45,15,74,1,0             // psubd         0x14a0f(%rip),%xmm5        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,69,15,118,192                   // pcmpeqd       %xmm8,%xmm8
   .byte  102,72,15,58,22,233,1               // pextrq        $0x1,%xmm5,%rcx
   .byte  102,72,15,126,237                   // movq          %xmm5,%rbp
@@ -42272,7 +42253,7 @@
   .byte  72,193,233,32                       // shr           $0x20,%rcx
   .byte  243,15,16,36,136                    // movss         (%rax,%rcx,4),%xmm4
   .byte  102,68,15,58,33,220,48              // insertps      $0x30,%xmm4,%xmm11
-  .byte  102,15,111,61,247,64,1,0            // movdqa        0x140f7(%rip),%xmm7        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,61,51,65,1,0             // movdqa        0x14133(%rip),%xmm7        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,254,223                      // paddd         %xmm7,%xmm3
   .byte  102,72,15,58,22,217,1               // pextrq        $0x1,%xmm3,%rcx
   .byte  102,72,15,126,221                   // movq          %xmm3,%rbp
@@ -42358,7 +42339,7 @@
   .byte  102,65,15,58,22,201,1               // pextrd        $0x1,%xmm1,%r9d
   .byte  102,65,15,58,22,202,2               // pextrd        $0x2,%xmm1,%r10d
   .byte  102,65,15,58,22,203,3               // pextrd        $0x3,%xmm1,%r11d
-  .byte  102,68,15,111,21,59,63,1,0          // movdqa        0x13f3b(%rip),%xmm10        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,21,119,63,1,0         // movdqa        0x13f77(%rip),%xmm10        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,202                   // paddd         %xmm10,%xmm1
   .byte  102,72,15,58,22,201,1               // pextrq        $0x1,%xmm1,%rcx
   .byte  102,72,15,126,203                   // movq          %xmm1,%rbx
@@ -42454,7 +42435,7 @@
   .byte  102,15,111,124,36,192               // movdqa        -0x40(%rsp),%xmm7
   .byte  102,15,111,199                      // movdqa        %xmm7,%xmm0
   .byte  102,15,254,195                      // paddd         %xmm3,%xmm0
-  .byte  102,15,111,21,229,67,1,0            // movdqa        0x143e5(%rip),%xmm2        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,21,209,67,1,0            // movdqa        0x143d1(%rip),%xmm2        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,56,64,194                    // pmulld        %xmm2,%xmm0
   .byte  102,15,111,200                      // movdqa        %xmm0,%xmm1
   .byte  102,69,15,118,192                   // pcmpeqd       %xmm8,%xmm8
@@ -42475,7 +42456,7 @@
   .byte  102,65,15,58,22,193,1               // pextrd        $0x1,%xmm0,%r9d
   .byte  102,65,15,58,22,194,2               // pextrd        $0x2,%xmm0,%r10d
   .byte  102,65,15,58,22,195,3               // pextrd        $0x3,%xmm0,%r11d
-  .byte  102,15,111,37,228,60,1,0            // movdqa        0x13ce4(%rip),%xmm4        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,37,32,61,1,0             // movdqa        0x13d20(%rip),%xmm4        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,254,196                      // paddd         %xmm4,%xmm0
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
   .byte  102,72,15,126,195                   // movq          %xmm0,%rbx
@@ -42547,7 +42528,7 @@
   .byte  102,68,15,111,226                   // movdqa        %xmm2,%xmm12
   .byte  102,65,15,56,64,204                 // pmulld        %xmm12,%xmm1
   .byte  102,15,111,209                      // movdqa        %xmm1,%xmm2
-  .byte  102,15,250,21,16,66,1,0             // psubd         0x14210(%rip),%xmm2        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,21,252,65,1,0            // psubd         0x141fc(%rip),%xmm2        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,72,15,58,22,209,1               // pextrq        $0x1,%xmm2,%rcx
   .byte  102,72,15,126,213                   // movq          %xmm2,%rbp
   .byte  137,235                             // mov           %ebp,%ebx
@@ -42581,7 +42562,7 @@
   .byte  102,65,15,111,249                   // movdqa        %xmm9,%xmm7
   .byte  102,69,15,56,64,236                 // pmulld        %xmm12,%xmm13
   .byte  102,65,15,111,205                   // movdqa        %xmm13,%xmm1
-  .byte  102,15,250,13,92,65,1,0             // psubd         0x1415c(%rip),%xmm1        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,13,72,65,1,0             // psubd         0x14148(%rip),%xmm1        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,72,15,58,22,201,1               // pextrq        $0x1,%xmm1,%rcx
   .byte  102,72,15,126,203                   // movq          %xmm1,%rbx
   .byte  137,221                             // mov           %ebx,%ebp
@@ -42646,7 +42627,7 @@
   .byte  102,65,15,111,206                   // movdqa        %xmm14,%xmm1
   .byte  102,15,111,108,36,192               // movdqa        -0x40(%rsp),%xmm5
   .byte  102,15,254,205                      // paddd         %xmm5,%xmm1
-  .byte  102,15,111,37,243,63,1,0            // movdqa        0x13ff3(%rip),%xmm4        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,37,223,63,1,0            // movdqa        0x13fdf(%rip),%xmm4        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,56,64,204                    // pmulld        %xmm4,%xmm1
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,15,118,246                      // pcmpeqd       %xmm6,%xmm6
@@ -42667,7 +42648,7 @@
   .byte  102,65,15,58,22,201,1               // pextrd        $0x1,%xmm1,%r9d
   .byte  102,65,15,58,22,202,2               // pextrd        $0x2,%xmm1,%r10d
   .byte  102,65,15,58,22,203,3               // pextrd        $0x3,%xmm1,%r11d
-  .byte  102,15,111,29,244,56,1,0            // movdqa        0x138f4(%rip),%xmm3        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,29,48,57,1,0             // movdqa        0x13930(%rip),%xmm3        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,254,203                      // paddd         %xmm3,%xmm1
   .byte  102,72,15,58,22,201,1               // pextrq        $0x1,%xmm1,%rcx
   .byte  102,72,15,126,203                   // movq          %xmm1,%rbx
@@ -42739,7 +42720,7 @@
   .byte  102,15,56,64,239                    // pmulld        %xmm7,%xmm5
   .byte  102,15,111,205                      // movdqa        %xmm5,%xmm1
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
-  .byte  102,15,250,5,36,62,1,0              // psubd         0x13e24(%rip),%xmm0        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,5,16,62,1,0              // psubd         0x13e10(%rip),%xmm0        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
   .byte  102,72,15,126,197                   // movq          %xmm0,%rbp
   .byte  137,235                             // mov           %ebp,%ebx
@@ -42757,7 +42738,7 @@
   .byte  102,65,15,58,22,193,1               // pextrd        $0x1,%xmm0,%r9d
   .byte  102,65,15,58,22,194,2               // pextrd        $0x2,%xmm0,%r10d
   .byte  102,65,15,58,22,195,3               // pextrd        $0x3,%xmm0,%r11d
-  .byte  102,15,254,5,34,55,1,0              // paddd         0x13722(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,5,94,55,1,0              // paddd         0x1375e(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
   .byte  102,72,15,126,195                   // movq          %xmm0,%rbx
   .byte  137,221                             // mov           %ebx,%ebp
@@ -42773,7 +42754,7 @@
   .byte  102,15,254,84,36,176                // paddd         -0x50(%rsp),%xmm2
   .byte  102,15,56,64,215                    // pmulld        %xmm7,%xmm2
   .byte  102,15,111,194                      // movdqa        %xmm2,%xmm0
-  .byte  102,15,250,5,107,61,1,0             // psubd         0x13d6b(%rip),%xmm0        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,5,87,61,1,0              // psubd         0x13d57(%rip),%xmm0        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
   .byte  102,72,15,126,195                   // movq          %xmm0,%rbx
   .byte  137,221                             // mov           %ebx,%ebp
@@ -42791,7 +42772,7 @@
   .byte  102,65,15,58,22,199,1               // pextrd        $0x1,%xmm0,%r15d
   .byte  102,65,15,58,22,196,2               // pextrd        $0x2,%xmm0,%r12d
   .byte  102,65,15,58,22,198,3               // pextrd        $0x3,%xmm0,%r14d
-  .byte  102,15,254,5,105,54,1,0             // paddd         0x13669(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,5,165,54,1,0             // paddd         0x136a5(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,72,15,58,22,195,1               // pextrq        $0x1,%xmm0,%rbx
   .byte  102,72,15,126,197                   // movq          %xmm0,%rbp
   .byte  137,233                             // mov           %ebp,%ecx
@@ -42854,7 +42835,7 @@
   .byte  15,89,212                           // mulps         %xmm4,%xmm2
   .byte  65,15,88,211                        // addps         %xmm11,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,38,53,1,0                  // movaps        0x13526(%rip),%xmm3        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,40,29,98,53,1,0                  // movaps        0x13562(%rip),%xmm3        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  72,139,76,36,248                    // mov           -0x8(%rsp),%rcx
   .byte  15,40,100,36,64                     // movaps        0x40(%rsp),%xmm4
   .byte  15,40,108,36,80                     // movaps        0x50(%rsp),%xmm5
@@ -42869,6 +42850,25 @@
   .byte  93                                  // pop           %rbp
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_gauss_a_to_rgba_sse41
+.globl _sk_gauss_a_to_rgba_sse41
+FUNCTION(_sk_gauss_a_to_rgba_sse41)
+_sk_gauss_a_to_rgba_sse41:
+  .byte  15,40,5,191,59,1,0                  // movaps        0x13bbf(%rip),%xmm0        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,197,59,1,0                  // addps         0x13bc5(%rip),%xmm0        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,203,59,1,0                  // addps         0x13bcb(%rip),%xmm0        # 36500 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,209,59,1,0                  // addps         0x13bd1(%rip),%xmm0        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,215,59,1,0                  // addps         0x13bd7(%rip),%xmm0        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  15,40,200                           // movaps        %xmm0,%xmm1
+  .byte  15,40,208                           // movaps        %xmm0,%xmm2
+  .byte  15,40,216                           // movaps        %xmm0,%xmm3
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_start_pipeline_sse2
 .globl _sk_start_pipeline_sse2
 FUNCTION(_sk_start_pipeline_sse2)
@@ -49231,25 +49231,6 @@
   .byte  65,94                               // pop           %r14
   .byte  255,224                             // jmpq          *%rax
 
-HIDDEN _sk_gauss_a_to_rgba_sse2
-.globl _sk_gauss_a_to_rgba_sse2
-FUNCTION(_sk_gauss_a_to_rgba_sse2)
-_sk_gauss_a_to_rgba_sse2:
-  .byte  15,40,5,106,220,0,0                 // movaps        0xdc6a(%rip),%xmm0        # 363e0 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,112,220,0,0                 // addps         0xdc70(%rip),%xmm0        # 363f0 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,118,220,0,0                 // addps         0xdc76(%rip),%xmm0        # 36400 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,124,220,0,0                 // addps         0xdc7c(%rip),%xmm0        # 36410 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,5,130,220,0,0                 // addps         0xdc82(%rip),%xmm0        # 36420 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
-  .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,200                           // movaps        %xmm0,%xmm1
-  .byte  15,40,208                           // movaps        %xmm0,%xmm2
-  .byte  15,40,216                           // movaps        %xmm0,%xmm3
-  .byte  255,224                             // jmpq          *%rax
-
 HIDDEN _sk_gradient_sse2
 .globl _sk_gradient_sse2
 FUNCTION(_sk_gradient_sse2)
@@ -49258,7 +49239,7 @@
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,41                              // jb            287e3 <_sk_gradient_sse2+0x38>
+  .byte  114,41                              // jb            287a7 <_sk_gradient_sse2+0x38>
   .byte  76,139,72,72                        // mov           0x48(%rax),%r9
   .byte  73,255,200                          // dec           %r8
   .byte  73,131,193,4                        // add           $0x4,%r9
@@ -49269,7 +49250,7 @@
   .byte  102,15,250,202                      // psubd         %xmm2,%xmm1
   .byte  73,131,193,4                        // add           $0x4,%r9
   .byte  73,255,200                          // dec           %r8
-  .byte  117,230                             // jne           287c9 <_sk_gradient_sse2+0x1e>
+  .byte  117,230                             // jne           2878d <_sk_gradient_sse2+0x1e>
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  102,15,112,209,78                   // pshufd        $0x4e,%xmm1,%xmm2
@@ -49409,29 +49390,29 @@
   .byte  69,15,94,220                        // divps         %xmm12,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
-  .byte  68,15,40,45,18,218,0,0              // movaps        0xda12(%rip),%xmm13        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  .byte  68,15,40,45,254,217,0,0             // movaps        0xd9fe(%rip),%xmm13        # 363e0 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,22,218,0,0              // addps         0xda16(%rip),%xmm13        # 36440 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  .byte  68,15,88,45,2,218,0,0               // addps         0xda02(%rip),%xmm13        # 363f0 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,26,218,0,0              // addps         0xda1a(%rip),%xmm13        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,88,45,6,218,0,0               // addps         0xda06(%rip),%xmm13        # 36400 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,30,218,0,0              // addps         0xda1e(%rip),%xmm13        # 36460 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  .byte  68,15,88,45,10,218,0,0              // addps         0xda0a(%rip),%xmm13        # 36410 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   .byte  69,15,89,235                        // mulps         %xmm11,%xmm13
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,40,21,29,218,0,0              // movaps        0xda1d(%rip),%xmm10        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,40,21,9,218,0,0               // movaps        0xda09(%rip),%xmm10        # 36420 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
   .byte  69,15,92,213                        // subps         %xmm13,%xmm10
   .byte  69,15,84,209                        // andps         %xmm9,%xmm10
   .byte  69,15,85,205                        // andnps        %xmm13,%xmm9
   .byte  69,15,86,202                        // orps          %xmm10,%xmm9
   .byte  68,15,194,192,1                     // cmpltps       %xmm0,%xmm8
-  .byte  68,15,40,21,208,211,0,0             // movaps        0xd3d0(%rip),%xmm10        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,21,12,212,0,0              // movaps        0xd40c(%rip),%xmm10        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,84,208                        // andps         %xmm8,%xmm10
   .byte  69,15,85,193                        // andnps        %xmm9,%xmm8
   .byte  69,15,86,194                        // orps          %xmm10,%xmm8
   .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
   .byte  68,15,194,200,1                     // cmpltps       %xmm0,%xmm9
-  .byte  68,15,40,21,191,211,0,0             // movaps        0xd3bf(%rip),%xmm10        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  68,15,40,21,251,211,0,0             // movaps        0xd3fb(%rip),%xmm10        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,208                        // subps         %xmm8,%xmm10
   .byte  69,15,84,209                        // andps         %xmm9,%xmm10
   .byte  69,15,85,200                        // andnps        %xmm8,%xmm9
@@ -49466,7 +49447,7 @@
   .byte  243,69,15,89,203                    // mulss         %xmm11,%xmm9
   .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
   .byte  68,15,88,200                        // addps         %xmm0,%xmm9
-  .byte  68,15,89,13,134,217,0,0             // mulps         0xd986(%rip),%xmm9        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
+  .byte  68,15,89,13,114,217,0,0             // mulps         0xd972(%rip),%xmm9        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
@@ -49474,7 +49455,7 @@
   .byte  243,69,15,89,219                    // mulss         %xmm11,%xmm11
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,92,227                        // subps         %xmm11,%xmm12
-  .byte  68,15,89,21,113,217,0,0             // mulps         0xd971(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
+  .byte  68,15,89,21,93,217,0,0              // mulps         0xd95d(%rip),%xmm10        # 36440 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
@@ -49483,8 +49464,8 @@
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  65,15,92,193                        // subps         %xmm9,%xmm0
-  .byte  68,15,87,13,25,216,0,0              // xorps         0xd819(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
-  .byte  68,15,89,5,241,210,0,0              // mulps         0xd2f1(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,87,13,85,216,0,0              // xorps         0xd855(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
+  .byte  68,15,89,5,45,211,0,0               // mulps         0xd32d(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
@@ -49505,7 +49486,7 @@
   .byte  243,69,15,89,203                    // mulss         %xmm11,%xmm9
   .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
   .byte  68,15,88,200                        // addps         %xmm0,%xmm9
-  .byte  68,15,89,13,232,216,0,0             // mulps         0xd8e8(%rip),%xmm9        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
+  .byte  68,15,89,13,212,216,0,0             // mulps         0xd8d4(%rip),%xmm9        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
@@ -49513,7 +49494,7 @@
   .byte  243,69,15,89,219                    // mulss         %xmm11,%xmm11
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,92,227                        // subps         %xmm11,%xmm12
-  .byte  68,15,89,21,211,216,0,0             // mulps         0xd8d3(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
+  .byte  68,15,89,21,191,216,0,0             // mulps         0xd8bf(%rip),%xmm10        # 36440 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
@@ -49522,8 +49503,8 @@
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  65,15,92,193                        // subps         %xmm9,%xmm0
-  .byte  68,15,87,13,123,215,0,0             // xorps         0xd77b(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
-  .byte  68,15,89,5,83,210,0,0               // mulps         0xd253(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,87,13,183,215,0,0             // xorps         0xd7b7(%rip),%xmm9        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
+  .byte  68,15,89,5,143,210,0,0              // mulps         0xd28f(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
@@ -49541,7 +49522,7 @@
   .byte  243,69,15,89,200                    // mulss         %xmm8,%xmm9
   .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
   .byte  68,15,88,200                        // addps         %xmm0,%xmm9
-  .byte  68,15,89,13,91,216,0,0              // mulps         0xd85b(%rip),%xmm9        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
+  .byte  68,15,89,13,71,216,0,0              // mulps         0xd847(%rip),%xmm9        # 36430 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
   .byte  69,15,89,210                        // mulps         %xmm10,%xmm10
@@ -49549,7 +49530,7 @@
   .byte  243,69,15,89,192                    // mulss         %xmm8,%xmm8
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
   .byte  65,15,92,192                        // subps         %xmm8,%xmm0
-  .byte  15,87,5,23,215,0,0                  // xorps         0xd717(%rip),%xmm0        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
+  .byte  15,87,5,83,215,0,0                  // xorps         0xd753(%rip),%xmm0        # 36360 <_sk_srcover_bgra_8888_sse2_lowp+0x1058>
   .byte  65,15,94,193                        // divps         %xmm9,%xmm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49595,7 +49576,7 @@
 FUNCTION(_sk_save_xy_sse2)
 _sk_save_xy_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,126,209,0,0              // movaps        0xd17e(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,5,186,209,0,0              // movaps        0xd1ba(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  15,17,0                             // movups        %xmm0,(%rax)
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,88,200                        // addps         %xmm8,%xmm9
@@ -49603,7 +49584,7 @@
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,40,217                        // movaps        %xmm9,%xmm11
   .byte  69,15,194,218,1                     // cmpltps       %xmm10,%xmm11
-  .byte  68,15,40,37,105,209,0,0             // movaps        0xd169(%rip),%xmm12        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  68,15,40,37,165,209,0,0             // movaps        0xd1a5(%rip),%xmm12        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,84,220                        // andps         %xmm12,%xmm11
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
@@ -49650,8 +49631,8 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,25,215,0,0                  // addps         0xd719(%rip),%xmm0        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,193,208,0,0             // movaps        0xd0c1(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,5,5,215,0,0                   // addps         0xd705(%rip),%xmm0        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,253,208,0,0             // movaps        0xd0fd(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,0,1,0,0                // movups        %xmm9,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -49664,7 +49645,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,141,208,0,0                 // addps         0xd08d(%rip),%xmm0        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  15,88,5,201,208,0,0                 // addps         0xd0c9(%rip),%xmm0        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  68,15,17,128,0,1,0,0                // movups        %xmm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49676,8 +49657,8 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,204,214,0,0                // addps         0xd6cc(%rip),%xmm1        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,116,208,0,0             // movaps        0xd074(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,13,184,214,0,0                // addps         0xd6b8(%rip),%xmm1        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,176,208,0,0             // movaps        0xd0b0(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,64,1,0,0               // movups        %xmm9,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -49690,7 +49671,7 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,63,208,0,0                 // addps         0xd03f(%rip),%xmm1        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  15,88,13,123,208,0,0                // addps         0xd07b(%rip),%xmm1        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  68,15,17,128,64,1,0,0               // movups        %xmm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49702,13 +49683,13 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,143,214,0,0                 // addps         0xd68f(%rip),%xmm0        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
-  .byte  68,15,40,13,39,208,0,0              // movaps        0xd027(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,5,123,214,0,0                 // addps         0xd67b(%rip),%xmm0        # 36460 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  .byte  68,15,40,13,99,208,0,0              // movaps        0xd063(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,131,214,0,0             // mulps         0xd683(%rip),%xmm9        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,13,11,210,0,0              // addps         0xd20b(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,13,111,214,0,0             // mulps         0xd66f(%rip),%xmm9        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,13,71,210,0,0              // addps         0xd247(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,0,1,0,0                // movups        %xmm9,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -49721,16 +49702,16 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,55,214,0,0                  // addps         0xd637(%rip),%xmm0        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,223,207,0,0             // movaps        0xcfdf(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,5,35,214,0,0                  // addps         0xd623(%rip),%xmm0        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,27,208,0,0              // movaps        0xd01b(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,83,214,0,0               // movaps        0xd653(%rip),%xmm8        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,5,63,214,0,0               // movaps        0xd63f(%rip),%xmm8        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,87,214,0,0               // addps         0xd657(%rip),%xmm8        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,5,67,214,0,0               // addps         0xd643(%rip),%xmm8        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,171,207,0,0              // addps         0xcfab(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,88,5,231,207,0,0              // addps         0xcfe7(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,79,214,0,0               // addps         0xd64f(%rip),%xmm8        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,5,59,214,0,0               // addps         0xd63b(%rip),%xmm8        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,128,0,1,0,0                // movups        %xmm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49740,17 +49721,17 @@
 FUNCTION(_sk_bicubic_p1x_sse2)
 _sk_bicubic_p1x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,137,207,0,0              // movaps        0xcf89(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,5,197,207,0,0              // movaps        0xcfc5(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,136,128,0,0,0              // movups        0x80(%rax),%xmm9
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,21,2,214,0,0               // movaps        0xd602(%rip),%xmm10        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,21,238,213,0,0             // movaps        0xd5ee(%rip),%xmm10        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,6,214,0,0               // addps         0xd606(%rip),%xmm10        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,21,242,213,0,0             // addps         0xd5f2(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,2,214,0,0               // addps         0xd602(%rip),%xmm10        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,21,238,213,0,0             // addps         0xd5ee(%rip),%xmm10        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,144,0,1,0,0                // movups        %xmm10,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49762,11 +49743,11 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,128,128,0,0,0              // movups        0x80(%rax),%xmm8
-  .byte  15,88,5,210,213,0,0                 // addps         0xd5d2(%rip),%xmm0        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  15,88,5,190,213,0,0                 // addps         0xd5be(%rip),%xmm0        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,162,213,0,0              // mulps         0xd5a2(%rip),%xmm8        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,5,42,209,0,0               // addps         0xd12a(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,5,142,213,0,0              // mulps         0xd58e(%rip),%xmm8        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,5,102,209,0,0              // addps         0xd166(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,0,1,0,0                // movups        %xmm8,0x100(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -49779,13 +49760,13 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,101,213,0,0                // addps         0xd565(%rip),%xmm1        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
-  .byte  68,15,40,13,253,206,0,0             // movaps        0xcefd(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,13,81,213,0,0                 // addps         0xd551(%rip),%xmm1        # 36460 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  .byte  68,15,40,13,57,207,0,0              // movaps        0xcf39(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,89,213,0,0              // mulps         0xd559(%rip),%xmm9        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,13,225,208,0,0             // addps         0xd0e1(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,13,69,213,0,0              // mulps         0xd545(%rip),%xmm9        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,13,29,209,0,0              // addps         0xd11d(%rip),%xmm9        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,64,1,0,0               // movups        %xmm9,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -49798,16 +49779,16 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,12,213,0,0                 // addps         0xd50c(%rip),%xmm1        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  .byte  68,15,40,13,180,206,0,0             // movaps        0xceb4(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,88,13,248,212,0,0                // addps         0xd4f8(%rip),%xmm1        # 36450 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  .byte  68,15,40,13,240,206,0,0             // movaps        0xcef0(%rip),%xmm9        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,40,213,0,0               // movaps        0xd528(%rip),%xmm8        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,5,20,213,0,0               // movaps        0xd514(%rip),%xmm8        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,44,213,0,0               // addps         0xd52c(%rip),%xmm8        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,5,24,213,0,0               // addps         0xd518(%rip),%xmm8        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,128,206,0,0              // addps         0xce80(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,88,5,188,206,0,0              // addps         0xcebc(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,36,213,0,0               // addps         0xd524(%rip),%xmm8        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,5,16,213,0,0               // addps         0xd510(%rip),%xmm8        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,128,64,1,0,0               // movups        %xmm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49817,17 +49798,17 @@
 FUNCTION(_sk_bicubic_p1y_sse2)
 _sk_bicubic_p1y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,94,206,0,0               // movaps        0xce5e(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
+  .byte  68,15,40,5,154,206,0,0              // movaps        0xce9a(%rip),%xmm8        # 35e40 <_sk_srcover_bgra_8888_sse2_lowp+0xb38>
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,136,192,0,0,0              // movups        0xc0(%rax),%xmm9
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  68,15,40,21,214,212,0,0             // movaps        0xd4d6(%rip),%xmm10        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  .byte  68,15,40,21,194,212,0,0             // movaps        0xd4c2(%rip),%xmm10        # 36480 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,218,212,0,0             // addps         0xd4da(%rip),%xmm10        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  68,15,88,21,198,212,0,0             // addps         0xd4c6(%rip),%xmm10        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,214,212,0,0             // addps         0xd4d6(%rip),%xmm10        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  68,15,88,21,194,212,0,0             // addps         0xd4c2(%rip),%xmm10        # 364a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   .byte  68,15,17,144,64,1,0,0               // movups        %xmm10,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -49839,11 +49820,11 @@
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,64                         // movups        0x40(%rax),%xmm1
   .byte  68,15,16,128,192,0,0,0              // movups        0xc0(%rax),%xmm8
-  .byte  15,88,13,165,212,0,0                // addps         0xd4a5(%rip),%xmm1        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  15,88,13,145,212,0,0                // addps         0xd491(%rip),%xmm1        # 36490 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,117,212,0,0              // mulps         0xd475(%rip),%xmm8        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
-  .byte  68,15,88,5,253,207,0,0              // addps         0xcffd(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
+  .byte  68,15,89,5,97,212,0,0               // mulps         0xd461(%rip),%xmm8        # 36470 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  .byte  68,15,88,5,57,208,0,0               // addps         0xd039(%rip),%xmm8        # 36050 <_sk_srcover_bgra_8888_sse2_lowp+0xd48>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,64,1,0,0               // movups        %xmm8,0x140(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -49988,7 +49969,7 @@
   .byte  102,65,15,254,205                   // paddd         %xmm13,%xmm1
   .byte  102,68,15,127,108,36,208            // movdqa        %xmm13,-0x30(%rsp)
   .byte  102,15,254,217                      // paddd         %xmm1,%xmm3
-  .byte  102,68,15,111,5,188,210,0,0         // movdqa        0xd2bc(%rip),%xmm8        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,68,15,111,5,168,210,0,0         // movdqa        0xd2a8(%rip),%xmm8        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,112,195,245                  // pshufd        $0xf5,%xmm3,%xmm0
   .byte  102,65,15,244,216                   // pmuludq       %xmm8,%xmm3
   .byte  102,65,15,244,192                   // pmuludq       %xmm8,%xmm0
@@ -50027,7 +50008,7 @@
   .byte  243,66,15,16,44,139                 // movss         (%rbx,%r9,4),%xmm5
   .byte  15,20,236                           // unpcklps      %xmm4,%xmm5
   .byte  102,15,20,221                       // unpcklpd      %xmm5,%xmm3
-  .byte  102,68,15,111,37,115,203,0,0        // movdqa        0xcb73(%rip),%xmm12        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,37,175,203,0,0        // movdqa        0xcbaf(%rip),%xmm12        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,196                   // paddd         %xmm12,%xmm0
   .byte  102,15,112,224,78                   // pshufd        $0x4e,%xmm0,%xmm4
   .byte  102,73,15,126,224                   // movq          %xmm4,%r8
@@ -50043,7 +50024,7 @@
   .byte  243,66,15,16,36,139                 // movss         (%rbx,%r9,4),%xmm4
   .byte  15,20,224                           // unpcklps      %xmm0,%xmm4
   .byte  102,15,20,236                       // unpcklpd      %xmm4,%xmm5
-  .byte  15,40,37,168,209,0,0                // movaps        0xd1a8(%rip),%xmm4        # 36500 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  .byte  15,40,37,148,209,0,0                // movaps        0xd194(%rip),%xmm4        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   .byte  68,15,88,220                        // addps         %xmm4,%xmm11
   .byte  68,15,41,92,36,192                  // movaps        %xmm11,-0x40(%rsp)
   .byte  68,15,40,223                        // movaps        %xmm7,%xmm11
@@ -50079,7 +50060,7 @@
   .byte  15,20,249                           // unpcklps      %xmm1,%xmm7
   .byte  102,15,20,252                       // unpcklpd      %xmm4,%xmm7
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
-  .byte  102,15,250,13,29,209,0,0            // psubd         0xd11d(%rip),%xmm1        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,13,9,209,0,0             // psubd         0xd109(%rip),%xmm1        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,225,78                   // pshufd        $0x4e,%xmm1,%xmm4
   .byte  102,73,15,126,224                   // movq          %xmm4,%r8
   .byte  102,72,15,126,200                   // movq          %xmm1,%rax
@@ -50169,7 +50150,7 @@
   .byte  243,15,16,44,131                    // movss         (%rbx,%rax,4),%xmm5
   .byte  15,20,233                           // unpcklps      %xmm1,%xmm5
   .byte  102,15,20,221                       // unpcklpd      %xmm5,%xmm3
-  .byte  102,68,15,111,45,229,200,0,0        // movdqa        0xc8e5(%rip),%xmm13        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,45,33,201,0,0         // movdqa        0xc921(%rip),%xmm13        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,197                   // paddd         %xmm13,%xmm0
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
   .byte  102,73,15,126,200                   // movq          %xmm1,%r8
@@ -50266,7 +50247,7 @@
   .byte  102,15,111,206                      // movdqa        %xmm6,%xmm1
   .byte  102,65,15,254,202                   // paddd         %xmm10,%xmm1
   .byte  102,15,112,209,245                  // pshufd        $0xf5,%xmm1,%xmm2
-  .byte  102,15,111,29,184,205,0,0           // movdqa        0xcdb8(%rip),%xmm3        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,29,164,205,0,0           // movdqa        0xcda4(%rip),%xmm3        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,203                      // pmuludq       %xmm3,%xmm1
   .byte  102,15,244,211                      // pmuludq       %xmm3,%xmm2
   .byte  102,15,111,251                      // movdqa        %xmm3,%xmm7
@@ -50304,7 +50285,7 @@
   .byte  243,15,16,44,131                    // movss         (%rbx,%rax,4),%xmm5
   .byte  15,20,236                           // unpcklps      %xmm4,%xmm5
   .byte  102,15,20,213                       // unpcklpd      %xmm5,%xmm2
-  .byte  102,15,111,5,120,198,0,0            // movdqa        0xc678(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,111,5,180,198,0,0            // movdqa        0xc6b4(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,254,216                      // paddd         %xmm0,%xmm3
   .byte  102,15,112,227,78                   // pshufd        $0x4e,%xmm3,%xmm4
   .byte  102,73,15,126,224                   // movq          %xmm4,%r8
@@ -50385,7 +50366,7 @@
   .byte  102,15,111,214                      // movdqa        %xmm6,%xmm2
   .byte  102,65,15,254,212                   // paddd         %xmm12,%xmm2
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,13,149,203,0,0           // movdqa        0xcb95(%rip),%xmm1        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,13,129,203,0,0           // movdqa        0xcb81(%rip),%xmm1        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,209                      // pmuludq       %xmm1,%xmm2
   .byte  102,15,244,193                      // pmuludq       %xmm1,%xmm0
   .byte  102,15,111,241                      // movdqa        %xmm1,%xmm6
@@ -50407,7 +50388,7 @@
   .byte  68,15,20,209                        // unpcklps      %xmm1,%xmm10
   .byte  102,68,15,20,210                    // unpcklpd      %xmm2,%xmm10
   .byte  102,15,111,200                      // movdqa        %xmm0,%xmm1
-  .byte  102,15,250,13,58,203,0,0            // psubd         0xcb3a(%rip),%xmm1        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,13,38,203,0,0            // psubd         0xcb26(%rip),%xmm1        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,209,78                   // pshufd        $0x4e,%xmm1,%xmm2
   .byte  102,73,15,126,208                   // movq          %xmm2,%r8
   .byte  102,72,15,126,200                   // movq          %xmm1,%rax
@@ -50422,7 +50403,7 @@
   .byte  243,15,16,20,131                    // movss         (%rbx,%rax,4),%xmm2
   .byte  15,20,209                           // unpcklps      %xmm1,%xmm2
   .byte  102,15,20,226                       // unpcklpd      %xmm2,%xmm4
-  .byte  102,15,254,5,85,196,0,0             // paddd         0xc455(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,5,145,196,0,0            // paddd         0xc491(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
   .byte  102,73,15,126,200                   // movq          %xmm1,%r8
   .byte  102,72,15,126,192                   // movq          %xmm0,%rax
@@ -50459,7 +50440,7 @@
   .byte  15,20,199                           // unpcklps      %xmm7,%xmm0
   .byte  102,15,20,193                       // unpcklpd      %xmm1,%xmm0
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
-  .byte  102,15,250,13,68,202,0,0            // psubd         0xca44(%rip),%xmm1        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,13,48,202,0,0            // psubd         0xca30(%rip),%xmm1        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,249,78                   // pshufd        $0x4e,%xmm1,%xmm7
   .byte  102,73,15,126,248                   // movq          %xmm7,%r8
   .byte  102,72,15,126,200                   // movq          %xmm1,%rax
@@ -50474,7 +50455,7 @@
   .byte  243,15,16,52,131                    // movss         (%rbx,%rax,4),%xmm6
   .byte  15,20,247                           // unpcklps      %xmm7,%xmm6
   .byte  102,15,20,206                       // unpcklpd      %xmm6,%xmm1
-  .byte  102,15,254,21,95,195,0,0            // paddd         0xc35f(%rip),%xmm2        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,21,155,195,0,0           // paddd         0xc39b(%rip),%xmm2        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,112,242,78                   // pshufd        $0x4e,%xmm2,%xmm6
   .byte  102,73,15,126,240                   // movq          %xmm6,%r8
   .byte  102,72,15,126,208                   // movq          %xmm2,%rax
@@ -50620,7 +50601,7 @@
   .byte  102,65,15,254,210                   // paddd         %xmm10,%xmm2
   .byte  102,15,111,220                      // movdqa        %xmm4,%xmm3
   .byte  102,15,254,218                      // paddd         %xmm2,%xmm3
-  .byte  102,15,111,37,135,199,0,0           // movdqa        0xc787(%rip),%xmm4        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,37,115,199,0,0           // movdqa        0xc773(%rip),%xmm4        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,112,195,245                  // pshufd        $0xf5,%xmm3,%xmm0
   .byte  102,15,244,220                      // pmuludq       %xmm4,%xmm3
   .byte  102,15,244,196                      // pmuludq       %xmm4,%xmm0
@@ -50659,7 +50640,7 @@
   .byte  243,66,15,16,52,139                 // movss         (%rbx,%r9,4),%xmm6
   .byte  15,20,244                           // unpcklps      %xmm4,%xmm6
   .byte  102,15,20,238                       // unpcklpd      %xmm6,%xmm5
-  .byte  102,15,254,5,65,192,0,0             // paddd         0xc041(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,5,125,192,0,0            // paddd         0xc07d(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,112,224,78                   // pshufd        $0x4e,%xmm0,%xmm4
   .byte  102,73,15,126,224                   // movq          %xmm4,%r8
   .byte  102,73,15,126,193                   // movq          %xmm0,%r9
@@ -50674,7 +50655,7 @@
   .byte  243,66,15,16,52,139                 // movss         (%rbx,%r9,4),%xmm6
   .byte  15,20,244                           // unpcklps      %xmm4,%xmm6
   .byte  102,15,20,222                       // unpcklpd      %xmm6,%xmm3
-  .byte  15,40,53,123,198,0,0                // movaps        0xc67b(%rip),%xmm6        # 36500 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  .byte  15,40,53,103,198,0,0                // movaps        0xc667(%rip),%xmm6        # 364b0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   .byte  68,15,88,198                        // addps         %xmm6,%xmm8
   .byte  68,15,41,68,36,176                  // movaps        %xmm8,-0x50(%rsp)
   .byte  68,15,88,254                        // addps         %xmm6,%xmm15
@@ -50692,7 +50673,7 @@
   .byte  102,65,15,254,215                   // paddd         %xmm15,%xmm2
   .byte  102,68,15,127,124,36,224            // movdqa        %xmm15,-0x20(%rsp)
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,13,49,198,0,0            // movdqa        0xc631(%rip),%xmm1        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,13,29,198,0,0            // movdqa        0xc61d(%rip),%xmm1        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,209                      // pmuludq       %xmm1,%xmm2
   .byte  102,15,244,193                      // pmuludq       %xmm1,%xmm0
   .byte  102,15,112,192,232                  // pshufd        $0xe8,%xmm0,%xmm0
@@ -50713,7 +50694,7 @@
   .byte  15,20,240                           // unpcklps      %xmm0,%xmm6
   .byte  102,15,20,242                       // unpcklpd      %xmm2,%xmm6
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
-  .byte  102,15,250,5,220,197,0,0            // psubd         0xc5dc(%rip),%xmm0        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,5,200,197,0,0            // psubd         0xc5c8(%rip),%xmm0        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,208,78                   // pshufd        $0x4e,%xmm0,%xmm2
   .byte  102,73,15,126,208                   // movq          %xmm2,%r8
   .byte  102,72,15,126,192                   // movq          %xmm0,%rax
@@ -50728,7 +50709,7 @@
   .byte  243,68,15,16,28,131                 // movss         (%rbx,%rax,4),%xmm11
   .byte  68,15,20,216                        // unpcklps      %xmm0,%xmm11
   .byte  102,65,15,20,211                    // unpcklpd      %xmm11,%xmm2
-  .byte  102,15,254,13,244,190,0,0           // paddd         0xbef4(%rip),%xmm1        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,13,48,191,0,0            // paddd         0xbf30(%rip),%xmm1        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,112,193,78                   // pshufd        $0x4e,%xmm1,%xmm0
   .byte  102,73,15,126,192                   // movq          %xmm0,%r8
   .byte  102,72,15,126,200                   // movq          %xmm1,%rax
@@ -50766,7 +50747,7 @@
   .byte  102,65,15,111,194                   // movdqa        %xmm10,%xmm0
   .byte  102,15,254,68,36,160                // paddd         -0x60(%rsp),%xmm0
   .byte  102,15,112,216,245                  // pshufd        $0xf5,%xmm0,%xmm3
-  .byte  102,15,111,37,219,196,0,0           // movdqa        0xc4db(%rip),%xmm4        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,37,199,196,0,0           // movdqa        0xc4c7(%rip),%xmm4        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,196                      // pmuludq       %xmm4,%xmm0
   .byte  102,15,244,220                      // pmuludq       %xmm4,%xmm3
   .byte  102,68,15,111,220                   // movdqa        %xmm4,%xmm11
@@ -50804,7 +50785,7 @@
   .byte  243,15,16,60,131                    // movss         (%rbx,%rax,4),%xmm7
   .byte  15,20,253                           // unpcklps      %xmm5,%xmm7
   .byte  102,15,20,199                       // unpcklpd      %xmm7,%xmm0
-  .byte  102,68,15,111,5,151,189,0,0         // movdqa        0xbd97(%rip),%xmm8        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,5,211,189,0,0         // movdqa        0xbdd3(%rip),%xmm8        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,224                   // paddd         %xmm8,%xmm4
   .byte  102,15,112,236,78                   // pshufd        $0x4e,%xmm4,%xmm5
   .byte  102,73,15,126,232                   // movq          %xmm5,%r8
@@ -50950,7 +50931,7 @@
   .byte  243,15,16,36,131                    // movss         (%rbx,%rax,4),%xmm4
   .byte  15,20,226                           // unpcklps      %xmm2,%xmm4
   .byte  102,15,20,252                       // unpcklpd      %xmm4,%xmm7
-  .byte  102,68,15,111,61,241,186,0,0        // movdqa        0xbaf1(%rip),%xmm15        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,61,45,187,0,0         // movdqa        0xbb2d(%rip),%xmm15        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,207                   // paddd         %xmm15,%xmm1
   .byte  102,15,112,209,78                   // pshufd        $0x4e,%xmm1,%xmm2
   .byte  102,73,15,126,208                   // movq          %xmm2,%r8
@@ -51033,7 +51014,7 @@
   .byte  102,65,15,111,217                   // movdqa        %xmm9,%xmm3
   .byte  102,65,15,254,216                   // paddd         %xmm8,%xmm3
   .byte  102,15,112,211,245                  // pshufd        $0xf5,%xmm3,%xmm2
-  .byte  102,15,111,45,16,192,0,0            // movdqa        0xc010(%rip),%xmm5        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,45,252,191,0,0           // movdqa        0xbffc(%rip),%xmm5        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,221                      // pmuludq       %xmm5,%xmm3
   .byte  102,15,244,213                      // pmuludq       %xmm5,%xmm2
   .byte  102,15,112,234,232                  // pshufd        $0xe8,%xmm2,%xmm5
@@ -51054,7 +51035,7 @@
   .byte  68,15,20,198                        // unpcklps      %xmm6,%xmm8
   .byte  102,68,15,20,197                    // unpcklpd      %xmm5,%xmm8
   .byte  102,15,111,234                      // movdqa        %xmm2,%xmm5
-  .byte  102,15,250,45,185,191,0,0           // psubd         0xbfb9(%rip),%xmm5        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,45,165,191,0,0           // psubd         0xbfa5(%rip),%xmm5        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,245,78                   // pshufd        $0x4e,%xmm5,%xmm6
   .byte  102,73,15,126,240                   // movq          %xmm6,%r8
   .byte  102,72,15,126,232                   // movq          %xmm5,%rax
@@ -51086,7 +51067,7 @@
   .byte  102,15,20,214                       // unpcklpd      %xmm6,%xmm2
   .byte  102,69,15,254,205                   // paddd         %xmm13,%xmm9
   .byte  102,65,15,112,233,245               // pshufd        $0xf5,%xmm9,%xmm5
-  .byte  102,15,111,29,23,191,0,0            // movdqa        0xbf17(%rip),%xmm3        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,29,3,191,0,0             // movdqa        0xbf03(%rip),%xmm3        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,68,15,244,203                   // pmuludq       %xmm3,%xmm9
   .byte  102,15,244,235                      // pmuludq       %xmm3,%xmm5
   .byte  102,15,112,237,232                  // pshufd        $0xe8,%xmm5,%xmm5
@@ -51107,7 +51088,7 @@
   .byte  69,15,20,233                        // unpcklps      %xmm9,%xmm13
   .byte  102,68,15,20,237                    // unpcklpd      %xmm5,%xmm13
   .byte  102,65,15,111,239                   // movdqa        %xmm15,%xmm5
-  .byte  102,15,250,45,183,190,0,0           // psubd         0xbeb7(%rip),%xmm5        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,45,163,190,0,0           // psubd         0xbea3(%rip),%xmm5        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,221,78                   // pshufd        $0x4e,%xmm5,%xmm3
   .byte  102,73,15,126,216                   // movq          %xmm3,%r8
   .byte  102,72,15,126,232                   // movq          %xmm5,%rax
@@ -51122,7 +51103,7 @@
   .byte  243,15,16,44,131                    // movss         (%rbx,%rax,4),%xmm5
   .byte  15,20,235                           // unpcklps      %xmm3,%xmm5
   .byte  102,15,20,245                       // unpcklpd      %xmm5,%xmm6
-  .byte  102,68,15,111,13,209,183,0,0        // movdqa        0xb7d1(%rip),%xmm9        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,13,13,184,0,0         // movdqa        0xb80d(%rip),%xmm9        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,69,15,254,249                   // paddd         %xmm9,%xmm15
   .byte  102,65,15,112,223,78                // pshufd        $0x4e,%xmm15,%xmm3
   .byte  102,73,15,126,216                   // movq          %xmm3,%r8
@@ -51184,7 +51165,7 @@
   .byte  102,65,15,111,195                   // movdqa        %xmm11,%xmm0
   .byte  102,15,254,195                      // paddd         %xmm3,%xmm0
   .byte  102,15,112,200,245                  // pshufd        $0xf5,%xmm0,%xmm1
-  .byte  102,15,111,21,82,189,0,0            // movdqa        0xbd52(%rip),%xmm2        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,21,62,189,0,0            // movdqa        0xbd3e(%rip),%xmm2        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,194                      // pmuludq       %xmm2,%xmm0
   .byte  102,15,244,202                      // pmuludq       %xmm2,%xmm1
   .byte  102,68,15,111,234                   // movdqa        %xmm2,%xmm13
@@ -51377,7 +51358,7 @@
   .byte  68,15,20,238                        // unpcklps      %xmm6,%xmm13
   .byte  102,68,15,20,234                    // unpcklpd      %xmm2,%xmm13
   .byte  102,15,111,212                      // movdqa        %xmm4,%xmm2
-  .byte  102,15,250,21,238,185,0,0           // psubd         0xb9ee(%rip),%xmm2        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,21,218,185,0,0           // psubd         0xb9da(%rip),%xmm2        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,242,78                   // pshufd        $0x4e,%xmm2,%xmm6
   .byte  102,73,15,126,240                   // movq          %xmm6,%r8
   .byte  102,72,15,126,208                   // movq          %xmm2,%rax
@@ -51437,7 +51418,7 @@
   .byte  102,68,15,111,116,36,160            // movdqa        -0x60(%rsp),%xmm14
   .byte  102,65,15,254,198                   // paddd         %xmm14,%xmm0
   .byte  102,15,112,200,245                  // pshufd        $0xf5,%xmm0,%xmm1
-  .byte  102,68,15,111,5,205,184,0,0         // movdqa        0xb8cd(%rip),%xmm8        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,68,15,111,5,185,184,0,0         // movdqa        0xb8b9(%rip),%xmm8        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,65,15,244,192                   // pmuludq       %xmm8,%xmm0
   .byte  102,65,15,244,200                   // pmuludq       %xmm8,%xmm1
   .byte  102,65,15,111,240                   // movdqa        %xmm8,%xmm6
@@ -51475,7 +51456,7 @@
   .byte  243,15,16,44,131                    // movss         (%rbx,%rax,4),%xmm5
   .byte  15,20,235                           // unpcklps      %xmm3,%xmm5
   .byte  102,15,20,205                       // unpcklpd      %xmm5,%xmm1
-  .byte  102,68,15,111,5,139,177,0,0         // movdqa        0xb18b(%rip),%xmm8        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,68,15,111,5,199,177,0,0         // movdqa        0xb1c7(%rip),%xmm8        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,65,15,254,208                   // paddd         %xmm8,%xmm2
   .byte  102,15,112,218,78                   // pshufd        $0x4e,%xmm2,%xmm3
   .byte  102,73,15,126,216                   // movq          %xmm3,%r8
@@ -51558,7 +51539,7 @@
   .byte  102,15,254,208                      // paddd         %xmm0,%xmm2
   .byte  102,15,111,240                      // movdqa        %xmm0,%xmm6
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,37,167,182,0,0           // movdqa        0xb6a7(%rip),%xmm4        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  102,15,111,37,147,182,0,0           // movdqa        0xb693(%rip),%xmm4        # 364c0 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
   .byte  102,15,244,212                      // pmuludq       %xmm4,%xmm2
   .byte  102,15,244,196                      // pmuludq       %xmm4,%xmm0
   .byte  102,15,112,200,232                  // pshufd        $0xe8,%xmm0,%xmm1
@@ -51579,7 +51560,7 @@
   .byte  68,15,20,193                        // unpcklps      %xmm1,%xmm8
   .byte  102,68,15,20,194                    // unpcklpd      %xmm2,%xmm8
   .byte  102,15,111,200                      // movdqa        %xmm0,%xmm1
-  .byte  102,15,250,13,80,182,0,0            // psubd         0xb650(%rip),%xmm1        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,13,60,182,0,0            // psubd         0xb63c(%rip),%xmm1        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,209,78                   // pshufd        $0x4e,%xmm1,%xmm2
   .byte  102,73,15,126,208                   // movq          %xmm2,%r8
   .byte  102,72,15,126,200                   // movq          %xmm1,%rax
@@ -51594,7 +51575,7 @@
   .byte  243,15,16,20,131                    // movss         (%rbx,%rax,4),%xmm2
   .byte  15,20,209                           // unpcklps      %xmm1,%xmm2
   .byte  102,68,15,20,242                    // unpcklpd      %xmm2,%xmm14
-  .byte  102,15,254,5,105,175,0,0            // paddd         0xaf69(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,5,165,175,0,0            // paddd         0xafa5(%rip),%xmm0        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
   .byte  102,73,15,126,200                   // movq          %xmm1,%r8
   .byte  102,72,15,126,192                   // movq          %xmm0,%rax
@@ -51632,7 +51613,7 @@
   .byte  15,20,198                           // unpcklps      %xmm6,%xmm0
   .byte  102,15,20,193                       // unpcklpd      %xmm1,%xmm0
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
-  .byte  102,15,250,13,88,181,0,0            // psubd         0xb558(%rip),%xmm1        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  102,15,250,13,68,181,0,0            // psubd         0xb544(%rip),%xmm1        # 364d0 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
   .byte  102,15,112,241,78                   // pshufd        $0x4e,%xmm1,%xmm6
   .byte  102,73,15,126,240                   // movq          %xmm6,%r8
   .byte  102,72,15,126,200                   // movq          %xmm1,%rax
@@ -51647,7 +51628,7 @@
   .byte  243,15,16,36,131                    // movss         (%rbx,%rax,4),%xmm4
   .byte  15,20,230                           // unpcklps      %xmm6,%xmm4
   .byte  102,15,20,204                       // unpcklpd      %xmm4,%xmm1
-  .byte  102,15,254,21,115,174,0,0           // paddd         0xae73(%rip),%xmm2        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
+  .byte  102,15,254,21,175,174,0,0           // paddd         0xaeaf(%rip),%xmm2        # 35e80 <_sk_srcover_bgra_8888_sse2_lowp+0xb78>
   .byte  102,15,112,226,78                   // pshufd        $0x4e,%xmm2,%xmm4
   .byte  102,73,15,126,224                   // movq          %xmm4,%r8
   .byte  102,72,15,126,208                   // movq          %xmm2,%rax
@@ -51706,7 +51687,7 @@
   .byte  15,89,212                           // mulps         %xmm4,%xmm2
   .byte  65,15,88,215                        // addps         %xmm15,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,95,173,0,0                 // movaps        0xad5f(%rip),%xmm3        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
+  .byte  15,40,29,155,173,0,0                // movaps        0xad9b(%rip),%xmm3        # 35e50 <_sk_srcover_bgra_8888_sse2_lowp+0xb48>
   .byte  15,40,100,36,96                     // movaps        0x60(%rsp),%xmm4
   .byte  15,40,108,36,112                    // movaps        0x70(%rsp),%xmm5
   .byte  15,40,180,36,128,0,0,0              // movaps        0x80(%rsp),%xmm6
@@ -51714,6 +51695,25 @@
   .byte  72,129,196,160,0,0,0                // add           $0xa0,%rsp
   .byte  91                                  // pop           %rbx
   .byte  255,224                             // jmpq          *%rax
+
+HIDDEN _sk_gauss_a_to_rgba_sse2
+.globl _sk_gauss_a_to_rgba_sse2
+FUNCTION(_sk_gauss_a_to_rgba_sse2)
+_sk_gauss_a_to_rgba_sse2:
+  .byte  15,40,5,0,180,0,0                   // movaps        0xb400(%rip),%xmm0        # 364e0 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,6,180,0,0                   // addps         0xb406(%rip),%xmm0        # 364f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,12,180,0,0                  // addps         0xb40c(%rip),%xmm0        # 36500 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,18,180,0,0                  // addps         0xb412(%rip),%xmm0        # 36510 <_sk_srcover_bgra_8888_sse2_lowp+0x1208>
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,5,24,180,0,0                  // addps         0xb418(%rip),%xmm0        # 36520 <_sk_srcover_bgra_8888_sse2_lowp+0x1218>
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  15,40,200                           // movaps        %xmm0,%xmm1
+  .byte  15,40,208                           // movaps        %xmm0,%xmm2
+  .byte  15,40,216                           // movaps        %xmm0,%xmm3
+  .byte  255,224                             // jmpq          *%rax
   .byte  144                                 // nop
   .byte  144                                 // nop
   .byte  144                                 // nop
@@ -62778,16 +62778,9 @@
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,152                              // ds            cwtl
-  .byte  221,147,61,45,16,17                 // fstl          0x11102d3d(%rbx)
-  .byte  192,18,120                          // rclb          $0x78,(%rdx)
-  .byte  57,64,32                            // cmp           %eax,0x20(%rax)
-  .byte  148                                 // xchg          %eax,%esp
-  .byte  90                                  // pop           %rdx
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // (bad)
-  .byte  62,0,24                             // add           %bl,%ds:(%rax)
-  .byte  161,57,111,43,231,187,159,215,202   // movabs        0xcad79fbbe72b6f39,%eax
-  .byte  60,212                              // cmp           $0xd4,%al
+  .byte  221,147,61,111,43,231               // fstl          -0x18d490c3(%rbx)
+  .byte  187,159,215,202,60                  // mov           $0x3ccad79f,%ebx
+  .byte  212                                 // (bad)
   .byte  100,84                              // fs            push %rsp
   .byte  189,169,240,34,62                   // mov           $0x3e22f0a9,%ebp
   .byte  0,0                                 // add           %al,(%rax)
@@ -62798,26 +62791,25 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,0,0                             // addb          $0x0,(%rax)
   .byte  0,191,0,0,192,191                   // add           %bh,-0x40400000(%rdi)
-  .byte  114,28                              // jb            3573e <.literal4+0x18a>
+  .byte  114,28                              // jb            3572a <.literal4+0x176>
   .byte  199                                 // (bad)
   .byte  62,85                               // ds            push %rbp
   .byte  85                                  // push          %rbp
   .byte  149                                 // xchg          %eax,%ebp
   .byte  191,0,0,192,63                      // mov           $0x3fc00000,%edi
   .byte  57,142,99,61,114,249                // cmp           %ecx,-0x68dc29d(%rsi)
-  .byte  127,63                              // jg            35773 <_sk_srcover_bgra_8888_sse2_lowp+0x46b>
+  .byte  127,63                              // jg            3575f <_sk_srcover_bgra_8888_sse2_lowp+0x457>
   .byte  3,0                                 // add           (%rax),%eax
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  109                                 // insl          (%dx),%es:(%rdi)
-  .byte  165                                 // movsl         %ds:(%rsi),%es:(%rdi)
-  .byte  144                                 // nop
-  .byte  63                                  // (bad)
-  .byte  252                                 // cld
-  .byte  191,16,62,0,4                       // mov           $0x4003e10,%edi
+  .byte  45,16,17,192,18                     // sub           $0x12c01110,%eax
+  .byte  120,57                              // js            35764 <_sk_srcover_bgra_8888_sse2_lowp+0x45c>
+  .byte  64,32,148,90,62,4,157,30            // and           %dl,0x1e9d043e(%rdx,%rbx,2)
+  .byte  62,0,24                             // add           %bl,%ds:(%rax)
+  .byte  161,57,109,165,144,63,252,191,16    // movabs        0x10bffc3f90a56d39,%eax
+  .byte  62,0,4,0                            // add           %al,%ds:(%rax,%rax,1)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,56                                // add           %bh,(%rax)
-  .byte  255                                 // (bad)
+  .byte  56,255                              // cmp           %bh,%bh
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  127,0                               // jg            3574d <.literal4+0x199>
@@ -64364,28 +64356,7 @@
   .byte  221,147,61,152,221,147              // fstl          -0x6c2267c3(%rbx)
   .byte  61,152,221,147,61                   // cmp           $0x3d93dd98,%eax
   .byte  152                                 // cwtl
-  .byte  221,147,61,45,16,17                 // fstl          0x11102d3d(%rbx)
-  .byte  192,45,16,17,192,45,16              // shrb          $0x10,0x2dc01110(%rip)        # 2dc374fa <_sk_srcover_bgra_8888_sse2_lowp+0x2dc021f2>
-  .byte  17,192                              // adc           %eax,%eax
-  .byte  45,16,17,192,18                     // sub           $0x12c01110,%eax
-  .byte  120,57                              // js            3642c <.literal16+0x63c>
-  .byte  64,18,120,57                        // adc           0x39(%rax),%dil
-  .byte  64,18,120,57                        // adc           0x39(%rax),%dil
-  .byte  64,18,120,57                        // adc           0x39(%rax),%dil
-  .byte  64,32,148,90,62,32,148,90           // and           %dl,0x5a94203e(%rdx,%rbx,2)
-  .byte  62,32,148,90,62,32,148,90           // and           %dl,%ds:0x5a94203e(%rdx,%rbx,2)
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // (bad)
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // (bad)
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // (bad)
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // (bad)
-  .byte  62,0,24                             // add           %bl,%ds:(%rax)
-  .byte  161,57,0,24,161,57,0,24,161         // movabs        0xa1180039a1180039,%eax
-  .byte  57,0                                // cmp           %eax,(%rax)
-  .byte  24,161,57,111,43,231                // sbb           %ah,-0x18d490c7(%rcx)
+  .byte  221,147,61,111,43,231               // fstl          -0x18d490c3(%rbx)
   .byte  187,111,43,231,187                  // mov           $0xbbe72b6f,%ebx
   .byte  111                                 // outsl         %ds:(%rsi),(%dx)
   .byte  43,231                              // sub           %edi,%esp
@@ -64433,13 +64404,13 @@
   .byte  192,191,0,0,192,191,0               // sarb          $0x0,-0x40400000(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  191,0,0,192,191                     // mov           $0xbfc00000,%edi
-  .byte  114,28                              // jb            364de <.literal16+0x6ee>
+  .byte  114,28                              // jb            3648e <.literal16+0x69e>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         364e2 <.literal16+0x6f2>
+  .byte  62,114,28                           // jb,pt         36492 <.literal16+0x6a2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         364e6 <.literal16+0x6f6>
+  .byte  62,114,28                           // jb,pt         36496 <.literal16+0x6a6>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         364ea <.literal16+0x6fa>
+  .byte  62,114,28                           // jb,pt         3649a <.literal16+0x6aa>
   .byte  199                                 // (bad)
   .byte  62,85                               // ds            push %rbp
   .byte  85                                  // push          %rbp
@@ -64458,15 +64429,15 @@
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d66f335 <_sk_srcover_bgra_8888_sse2_lowp+0x3d63a02d>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d66f2e5 <_sk_srcover_bgra_8888_sse2_lowp+0x3d639fdd>
   .byte  57,142,99,61,114,249                // cmp           %ecx,-0x68dc29d(%rsi)
-  .byte  127,63                              // jg            36543 <.literal16+0x753>
-  .byte  114,249                             // jb            364ff <.literal16+0x70f>
-  .byte  127,63                              // jg            36547 <.literal16+0x757>
-  .byte  114,249                             // jb            36503 <.literal16+0x713>
-  .byte  127,63                              // jg            3654b <.literal16+0x75b>
-  .byte  114,249                             // jb            36507 <.literal16+0x717>
-  .byte  127,63                              // jg            3654f <.literal16+0x75f>
+  .byte  127,63                              // jg            364f3 <.literal16+0x703>
+  .byte  114,249                             // jb            364af <.literal16+0x6bf>
+  .byte  127,63                              // jg            364f7 <.literal16+0x707>
+  .byte  114,249                             // jb            364b3 <.literal16+0x6c3>
+  .byte  127,63                              // jg            364fb <.literal16+0x70b>
+  .byte  114,249                             // jb            364b7 <.literal16+0x6c7>
+  .byte  127,63                              // jg            364ff <.literal16+0x70f>
   .byte  3,0                                 // add           (%rax),%eax
   .byte  0,0                                 // add           %al,(%rax)
   .byte  3,0                                 // add           (%rax),%eax
@@ -64490,8 +64461,28 @@
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,0                               // incl          (%rax)
-  .byte  0,255                               // add           %bh,%bh
+  .byte  255,45,16,17,192,45                 // ljmp          *0x2dc01110(%rip)        # 2dc375f5 <_sk_srcover_bgra_8888_sse2_lowp+0x2dc022ed>
+  .byte  16,17                               // adc           %dl,(%rcx)
+  .byte  192,45,16,17,192,45,16              // shrb          $0x10,0x2dc01110(%rip)        # 2dc375fe <_sk_srcover_bgra_8888_sse2_lowp+0x2dc022f6>
+  .byte  17,192                              // adc           %eax,%eax
+  .byte  18,120,57                           // adc           0x39(%rax),%bh
+  .byte  64,18,120,57                        // adc           0x39(%rax),%dil
+  .byte  64,18,120,57                        // adc           0x39(%rax),%dil
+  .byte  64,18,120,57                        // adc           0x39(%rax),%dil
+  .byte  64,32,148,90,62,32,148,90           // and           %dl,0x5a94203e(%rdx,%rbx,2)
+  .byte  62,32,148,90,62,32,148,90           // and           %dl,%ds:0x5a94203e(%rdx,%rbx,2)
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // (bad)
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // (bad)
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // (bad)
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // (bad)
+  .byte  62,0,24                             // add           %bl,%ds:(%rax)
+  .byte  161,57,0,24,161,57,0,24,161         // movabs        0xa1180039a1180039,%eax
+  .byte  57,0                                // cmp           %eax,(%rax)
+  .byte  24,161,57,0,0,255                   // sbb           %ah,-0xffffc7(%rcx)
   .byte  255,0                               // incl          (%rax)
   .byte  0,255                               // add           %bh,%bh
   .byte  255,0                               // incl          (%rax)
@@ -73210,37 +73201,6 @@
   .byte  93                                  // pop           %ebp
   .byte  195                                 // ret
 
-HIDDEN _sk_gauss_a_to_rgba_sse2
-.globl _sk_gauss_a_to_rgba_sse2
-FUNCTION(_sk_gauss_a_to_rgba_sse2)
-_sk_gauss_a_to_rgba_sse2:
-  .byte  85                                  // push          %ebp
-  .byte  137,229                             // mov           %esp,%ebp
-  .byte  131,236,8                           // sub           $0x8,%esp
-  .byte  232,0,0,0,0                         // call          697e <_sk_gauss_a_to_rgba_sse2+0xb>
-  .byte  88                                  // pop           %eax
-  .byte  139,77,12                           // mov           0xc(%ebp),%ecx
-  .byte  15,40,128,226,121,0,0               // movaps        0x79e2(%eax),%xmm0
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,128,242,121,0,0               // addps         0x79f2(%eax),%xmm0
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,128,2,122,0,0                 // addps         0x7a02(%eax),%xmm0
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,128,18,122,0,0                // addps         0x7a12(%eax),%xmm0
-  .byte  15,89,195                           // mulps         %xmm3,%xmm0
-  .byte  15,88,128,34,122,0,0                // addps         0x7a22(%eax),%xmm0
-  .byte  141,65,4                            // lea           0x4(%ecx),%eax
-  .byte  131,236,8                           // sub           $0x8,%esp
-  .byte  15,40,200                           // movaps        %xmm0,%xmm1
-  .byte  15,40,208                           // movaps        %xmm0,%xmm2
-  .byte  15,40,216                           // movaps        %xmm0,%xmm3
-  .byte  80                                  // push          %eax
-  .byte  255,117,8                           // pushl         0x8(%ebp)
-  .byte  255,17                              // call          *(%ecx)
-  .byte  131,196,24                          // add           $0x18,%esp
-  .byte  93                                  // pop           %ebp
-  .byte  195                                 // ret
-
 HIDDEN _sk_gradient_sse2
 .globl _sk_gradient_sse2
 FUNCTION(_sk_gradient_sse2)
@@ -73256,7 +73216,7 @@
   .byte  139,10                              // mov           (%edx),%ecx
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
   .byte  131,249,2                           // cmp           $0x2,%ecx
-  .byte  114,33                              // jb            6a05 <_sk_gradient_sse2+0x3a>
+  .byte  114,33                              // jb            69ad <_sk_gradient_sse2+0x3a>
   .byte  139,114,36                          // mov           0x24(%edx),%esi
   .byte  73                                  // dec           %ecx
   .byte  131,198,4                           // add           $0x4,%esi
@@ -73267,7 +73227,7 @@
   .byte  102,15,250,202                      // psubd         %xmm2,%xmm1
   .byte  131,198,4                           // add           $0x4,%esi
   .byte  73                                  // dec           %ecx
-  .byte  117,234                             // jne           69ef <_sk_gradient_sse2+0x24>
+  .byte  117,234                             // jne           6997 <_sk_gradient_sse2+0x24>
   .byte  102,15,126,206                      // movd          %xmm1,%esi
   .byte  102,15,112,209,229                  // pshufd        $0xe5,%xmm1,%xmm2
   .byte  102,15,112,217,78                   // pshufd        $0x4e,%xmm1,%xmm3
@@ -73420,7 +73380,7 @@
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
   .byte  15,40,209                           // movaps        %xmm1,%xmm2
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
-  .byte  232,0,0,0,0                         // call          6c0d <_sk_xy_to_unit_angle_sse2+0x18>
+  .byte  232,0,0,0,0                         // call          6bb5 <_sk_xy_to_unit_angle_sse2+0x18>
   .byte  88                                  // pop           %eax
   .byte  15,87,237                           // xorps         %xmm5,%xmm5
   .byte  15,92,233                           // subps         %xmm1,%xmm5
@@ -73435,30 +73395,30 @@
   .byte  15,94,247                           // divps         %xmm7,%xmm6
   .byte  15,40,254                           // movaps        %xmm6,%xmm7
   .byte  15,89,255                           // mulps         %xmm7,%xmm7
-  .byte  15,40,128,163,119,0,0               // movaps        0x77a3(%eax),%xmm0
+  .byte  15,40,128,171,119,0,0               // movaps        0x77ab(%eax),%xmm0
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  15,88,128,179,119,0,0               // addps         0x77b3(%eax),%xmm0
+  .byte  15,88,128,187,119,0,0               // addps         0x77bb(%eax),%xmm0
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  15,88,128,195,119,0,0               // addps         0x77c3(%eax),%xmm0
+  .byte  15,88,128,203,119,0,0               // addps         0x77cb(%eax),%xmm0
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  15,88,128,211,119,0,0               // addps         0x77d3(%eax),%xmm0
+  .byte  15,88,128,219,119,0,0               // addps         0x77db(%eax),%xmm0
   .byte  15,89,198                           // mulps         %xmm6,%xmm0
   .byte  15,194,236,1                        // cmpltps       %xmm4,%xmm5
-  .byte  15,40,176,227,119,0,0               // movaps        0x77e3(%eax),%xmm6
+  .byte  15,40,176,235,119,0,0               // movaps        0x77eb(%eax),%xmm6
   .byte  15,92,240                           // subps         %xmm0,%xmm6
   .byte  15,84,245                           // andps         %xmm5,%xmm6
   .byte  15,85,232                           // andnps        %xmm0,%xmm5
   .byte  15,87,228                           // xorps         %xmm4,%xmm4
   .byte  15,86,238                           // orps          %xmm6,%xmm5
   .byte  15,194,204,1                        // cmpltps       %xmm4,%xmm1
-  .byte  15,40,128,163,113,0,0               // movaps        0x71a3(%eax),%xmm0
+  .byte  15,40,128,251,113,0,0               // movaps        0x71fb(%eax),%xmm0
   .byte  15,92,197                           // subps         %xmm5,%xmm0
   .byte  15,84,193                           // andps         %xmm1,%xmm0
   .byte  15,85,205                           // andnps        %xmm5,%xmm1
   .byte  15,86,200                           // orps          %xmm0,%xmm1
   .byte  15,40,194                           // movaps        %xmm2,%xmm0
   .byte  15,194,196,1                        // cmpltps       %xmm4,%xmm0
-  .byte  15,40,168,179,113,0,0               // movaps        0x71b3(%eax),%xmm5
+  .byte  15,40,168,11,114,0,0                // movaps        0x720b(%eax),%xmm5
   .byte  15,92,233                           // subps         %xmm1,%xmm5
   .byte  15,84,232                           // andps         %xmm0,%xmm5
   .byte  15,85,193                           // andnps        %xmm1,%xmm0
@@ -73519,15 +73479,15 @@
   .byte  15,40,241                           // movaps        %xmm1,%xmm6
   .byte  15,89,246                           // mulps         %xmm6,%xmm6
   .byte  15,88,240                           // addps         %xmm0,%xmm6
-  .byte  232,0,0,0,0                         // call          6d2a <_sk_xy_to_2pt_conical_quadratic_max_sse2+0x31>
+  .byte  232,0,0,0,0                         // call          6cd2 <_sk_xy_to_2pt_conical_quadratic_max_sse2+0x31>
   .byte  90                                  // pop           %edx
   .byte  243,15,89,237                       // mulss         %xmm5,%xmm5
   .byte  15,198,237,0                        // shufps        $0x0,%xmm5,%xmm5
   .byte  15,92,245                           // subps         %xmm5,%xmm6
   .byte  243,15,16,65,64                     // movss         0x40(%ecx),%xmm0
   .byte  15,198,192,0                        // shufps        $0x0,%xmm0,%xmm0
-  .byte  15,89,162,214,118,0,0               // mulps         0x76d6(%edx),%xmm4
-  .byte  15,89,130,230,118,0,0               // mulps         0x76e6(%edx),%xmm0
+  .byte  15,89,162,222,118,0,0               // mulps         0x76de(%edx),%xmm4
+  .byte  15,89,130,238,118,0,0               // mulps         0x76ee(%edx),%xmm0
   .byte  15,89,198                           // mulps         %xmm6,%xmm0
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
   .byte  15,89,237                           // mulps         %xmm5,%xmm5
@@ -73535,11 +73495,11 @@
   .byte  15,81,237                           // sqrtps        %xmm5,%xmm5
   .byte  15,40,197                           // movaps        %xmm5,%xmm0
   .byte  15,92,196                           // subps         %xmm4,%xmm0
-  .byte  15,87,162,134,117,0,0               // xorps         0x7586(%edx),%xmm4
+  .byte  15,87,162,222,117,0,0               // xorps         0x75de(%edx),%xmm4
   .byte  15,92,229                           // subps         %xmm5,%xmm4
   .byte  243,15,16,105,68                    // movss         0x44(%ecx),%xmm5
   .byte  15,198,237,0                        // shufps        $0x0,%xmm5,%xmm5
-  .byte  15,89,170,134,112,0,0               // mulps         0x7086(%edx),%xmm5
+  .byte  15,89,170,222,112,0,0               // mulps         0x70de(%edx),%xmm5
   .byte  15,89,197                           // mulps         %xmm5,%xmm0
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
   .byte  15,95,196                           // maxps         %xmm4,%xmm0
@@ -73570,15 +73530,15 @@
   .byte  15,40,241                           // movaps        %xmm1,%xmm6
   .byte  15,89,246                           // mulps         %xmm6,%xmm6
   .byte  15,88,240                           // addps         %xmm0,%xmm6
-  .byte  232,0,0,0,0                         // call          6dc8 <_sk_xy_to_2pt_conical_quadratic_min_sse2+0x31>
+  .byte  232,0,0,0,0                         // call          6d70 <_sk_xy_to_2pt_conical_quadratic_min_sse2+0x31>
   .byte  90                                  // pop           %edx
   .byte  243,15,89,237                       // mulss         %xmm5,%xmm5
   .byte  15,198,237,0                        // shufps        $0x0,%xmm5,%xmm5
   .byte  15,92,245                           // subps         %xmm5,%xmm6
   .byte  243,15,16,65,64                     // movss         0x40(%ecx),%xmm0
   .byte  15,198,192,0                        // shufps        $0x0,%xmm0,%xmm0
-  .byte  15,89,162,56,118,0,0                // mulps         0x7638(%edx),%xmm4
-  .byte  15,89,130,72,118,0,0                // mulps         0x7648(%edx),%xmm0
+  .byte  15,89,162,64,118,0,0                // mulps         0x7640(%edx),%xmm4
+  .byte  15,89,130,80,118,0,0                // mulps         0x7650(%edx),%xmm0
   .byte  15,89,198                           // mulps         %xmm6,%xmm0
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
   .byte  15,89,237                           // mulps         %xmm5,%xmm5
@@ -73586,11 +73546,11 @@
   .byte  15,81,237                           // sqrtps        %xmm5,%xmm5
   .byte  15,40,197                           // movaps        %xmm5,%xmm0
   .byte  15,92,196                           // subps         %xmm4,%xmm0
-  .byte  15,87,162,232,116,0,0               // xorps         0x74e8(%edx),%xmm4
+  .byte  15,87,162,64,117,0,0                // xorps         0x7540(%edx),%xmm4
   .byte  15,92,229                           // subps         %xmm5,%xmm4
   .byte  243,15,16,105,68                    // movss         0x44(%ecx),%xmm5
   .byte  15,198,237,0                        // shufps        $0x0,%xmm5,%xmm5
-  .byte  15,89,170,232,111,0,0               // mulps         0x6fe8(%edx),%xmm5
+  .byte  15,89,170,64,112,0,0                // mulps         0x7040(%edx),%xmm5
   .byte  15,89,197                           // mulps         %xmm5,%xmm0
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
   .byte  15,93,196                           // minps         %xmm4,%xmm0
@@ -73610,7 +73570,7 @@
   .byte  85                                  // push          %ebp
   .byte  137,229                             // mov           %esp,%ebp
   .byte  131,236,8                           // sub           $0x8,%esp
-  .byte  232,0,0,0,0                         // call          6e40 <_sk_xy_to_2pt_conical_linear_sse2+0xb>
+  .byte  232,0,0,0,0                         // call          6de8 <_sk_xy_to_2pt_conical_linear_sse2+0xb>
   .byte  89                                  // pop           %ecx
   .byte  139,69,12                           // mov           0xc(%ebp),%eax
   .byte  139,16                              // mov           (%eax),%edx
@@ -73619,7 +73579,7 @@
   .byte  243,15,89,236                       // mulss         %xmm4,%xmm5
   .byte  15,198,237,0                        // shufps        $0x0,%xmm5,%xmm5
   .byte  15,88,232                           // addps         %xmm0,%xmm5
-  .byte  15,89,169,192,117,0,0               // mulps         0x75c0(%ecx),%xmm5
+  .byte  15,89,169,200,117,0,0               // mulps         0x75c8(%ecx),%xmm5
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  15,40,241                           // movaps        %xmm1,%xmm6
   .byte  15,89,246                           // mulps         %xmm6,%xmm6
@@ -73627,7 +73587,7 @@
   .byte  243,15,89,228                       // mulss         %xmm4,%xmm4
   .byte  15,198,228,0                        // shufps        $0x0,%xmm4,%xmm4
   .byte  15,92,196                           // subps         %xmm4,%xmm0
-  .byte  15,87,129,112,116,0,0               // xorps         0x7470(%ecx),%xmm0
+  .byte  15,87,129,200,116,0,0               // xorps         0x74c8(%ecx),%xmm0
   .byte  15,94,197                           // divps         %xmm5,%xmm0
   .byte  141,72,8                            // lea           0x8(%eax),%ecx
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -73704,16 +73664,16 @@
   .byte  131,236,24                          // sub           $0x18,%esp
   .byte  15,41,93,232                        // movaps        %xmm3,-0x18(%ebp)
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
-  .byte  232,0,0,0,0                         // call          6f29 <_sk_save_xy_sse2+0x12>
+  .byte  232,0,0,0,0                         // call          6ed1 <_sk_save_xy_sse2+0x12>
   .byte  88                                  // pop           %eax
-  .byte  15,40,160,135,110,0,0               // movaps        0x6e87(%eax),%xmm4
+  .byte  15,40,160,223,110,0,0               // movaps        0x6edf(%eax),%xmm4
   .byte  15,40,232                           // movaps        %xmm0,%xmm5
   .byte  15,88,236                           // addps         %xmm4,%xmm5
   .byte  243,15,91,245                       // cvttps2dq     %xmm5,%xmm6
   .byte  15,91,246                           // cvtdq2ps      %xmm6,%xmm6
   .byte  15,40,253                           // movaps        %xmm5,%xmm7
   .byte  15,194,254,1                        // cmpltps       %xmm6,%xmm7
-  .byte  15,40,144,151,110,0,0               // movaps        0x6e97(%eax),%xmm2
+  .byte  15,40,144,239,110,0,0               // movaps        0x6eef(%eax),%xmm2
   .byte  15,84,250                           // andps         %xmm2,%xmm7
   .byte  15,92,247                           // subps         %xmm7,%xmm6
   .byte  15,92,238                           // subps         %xmm6,%xmm5
@@ -73788,15 +73748,15 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          7011 <_sk_bilinear_nx_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          6fb9 <_sk_bilinear_nx_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,6                             // movups        (%esi),%xmm0
   .byte  15,16,166,128,0,0,0                 // movups        0x80(%esi),%xmm4
-  .byte  15,88,128,15,116,0,0                // addps         0x740f(%eax),%xmm0
-  .byte  15,40,168,175,109,0,0               // movaps        0x6daf(%eax),%xmm5
+  .byte  15,88,128,23,116,0,0                // addps         0x7417(%eax),%xmm0
+  .byte  15,40,168,7,110,0,0                 // movaps        0x6e07(%eax),%xmm5
   .byte  15,92,236                           // subps         %xmm4,%xmm5
   .byte  15,17,174,0,1,0,0                   // movups        %xmm5,0x100(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
@@ -73817,14 +73777,14 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          7057 <_sk_bilinear_px_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          6fff <_sk_bilinear_px_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,6                             // movups        (%esi),%xmm0
   .byte  15,16,166,128,0,0,0                 // movups        0x80(%esi),%xmm4
-  .byte  15,88,128,89,109,0,0                // addps         0x6d59(%eax),%xmm0
+  .byte  15,88,128,177,109,0,0               // addps         0x6db1(%eax),%xmm0
   .byte  15,17,166,0,1,0,0                   // movups        %xmm4,0x100(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -73844,15 +73804,15 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          7093 <_sk_bilinear_ny_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          703b <_sk_bilinear_ny_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,78,64                         // movups        0x40(%esi),%xmm1
   .byte  15,16,166,192,0,0,0                 // movups        0xc0(%esi),%xmm4
-  .byte  15,88,136,141,115,0,0               // addps         0x738d(%eax),%xmm1
-  .byte  15,40,168,45,109,0,0                // movaps        0x6d2d(%eax),%xmm5
+  .byte  15,88,136,149,115,0,0               // addps         0x7395(%eax),%xmm1
+  .byte  15,40,168,133,109,0,0               // movaps        0x6d85(%eax),%xmm5
   .byte  15,92,236                           // subps         %xmm4,%xmm5
   .byte  15,17,174,64,1,0,0                  // movups        %xmm5,0x140(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
@@ -73873,14 +73833,14 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          70da <_sk_bilinear_py_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          7082 <_sk_bilinear_py_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,78,64                         // movups        0x40(%esi),%xmm1
   .byte  15,16,166,192,0,0,0                 // movups        0xc0(%esi),%xmm4
-  .byte  15,88,136,214,108,0,0               // addps         0x6cd6(%eax),%xmm1
+  .byte  15,88,136,46,109,0,0                // addps         0x6d2e(%eax),%xmm1
   .byte  15,17,166,64,1,0,0                  // movups        %xmm4,0x140(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -73900,20 +73860,20 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          7117 <_sk_bicubic_n3x_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          70bf <_sk_bicubic_n3x_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,6                             // movups        (%esi),%xmm0
   .byte  15,16,166,128,0,0,0                 // movups        0x80(%esi),%xmm4
-  .byte  15,88,128,25,115,0,0                // addps         0x7319(%eax),%xmm0
-  .byte  15,40,168,169,108,0,0               // movaps        0x6ca9(%eax),%xmm5
+  .byte  15,88,128,33,115,0,0                // addps         0x7321(%eax),%xmm0
+  .byte  15,40,168,1,109,0,0                 // movaps        0x6d01(%eax),%xmm5
   .byte  15,92,236                           // subps         %xmm4,%xmm5
   .byte  15,40,229                           // movaps        %xmm5,%xmm4
   .byte  15,89,228                           // mulps         %xmm4,%xmm4
-  .byte  15,89,168,41,115,0,0                // mulps         0x7329(%eax),%xmm5
-  .byte  15,88,168,153,110,0,0               // addps         0x6e99(%eax),%xmm5
+  .byte  15,89,168,49,115,0,0                // mulps         0x7331(%eax),%xmm5
+  .byte  15,88,168,241,110,0,0               // addps         0x6ef1(%eax),%xmm5
   .byte  15,89,236                           // mulps         %xmm4,%xmm5
   .byte  15,17,174,0,1,0,0                   // movups        %xmm5,0x100(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
@@ -73934,23 +73894,23 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          7174 <_sk_bicubic_n1x_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          711c <_sk_bicubic_n1x_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,6                             // movups        (%esi),%xmm0
   .byte  15,16,166,128,0,0,0                 // movups        0x80(%esi),%xmm4
-  .byte  15,88,128,172,114,0,0               // addps         0x72ac(%eax),%xmm0
-  .byte  15,40,168,76,108,0,0                // movaps        0x6c4c(%eax),%xmm5
+  .byte  15,88,128,180,114,0,0               // addps         0x72b4(%eax),%xmm0
+  .byte  15,40,168,164,108,0,0               // movaps        0x6ca4(%eax),%xmm5
   .byte  15,92,236                           // subps         %xmm4,%xmm5
-  .byte  15,40,160,220,114,0,0               // movaps        0x72dc(%eax),%xmm4
+  .byte  15,40,160,228,114,0,0               // movaps        0x72e4(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
-  .byte  15,88,160,236,114,0,0               // addps         0x72ec(%eax),%xmm4
+  .byte  15,88,160,244,114,0,0               // addps         0x72f4(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
-  .byte  15,88,160,60,108,0,0                // addps         0x6c3c(%eax),%xmm4
+  .byte  15,88,160,148,108,0,0               // addps         0x6c94(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
-  .byte  15,88,160,252,114,0,0               // addps         0x72fc(%eax),%xmm4
+  .byte  15,88,160,4,115,0,0                 // addps         0x7304(%eax),%xmm4
   .byte  15,17,166,0,1,0,0                   // movups        %xmm4,0x100(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -73970,22 +73930,22 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          71df <_sk_bicubic_p1x_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          7187 <_sk_bicubic_p1x_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
-  .byte  15,40,160,209,107,0,0               // movaps        0x6bd1(%eax),%xmm4
+  .byte  15,40,160,41,108,0,0                // movaps        0x6c29(%eax),%xmm4
   .byte  15,16,6                             // movups        (%esi),%xmm0
   .byte  15,16,174,128,0,0,0                 // movups        0x80(%esi),%xmm5
   .byte  15,88,196                           // addps         %xmm4,%xmm0
-  .byte  15,40,176,113,114,0,0               // movaps        0x7271(%eax),%xmm6
+  .byte  15,40,176,121,114,0,0               // movaps        0x7279(%eax),%xmm6
   .byte  15,89,245                           // mulps         %xmm5,%xmm6
-  .byte  15,88,176,129,114,0,0               // addps         0x7281(%eax),%xmm6
+  .byte  15,88,176,137,114,0,0               // addps         0x7289(%eax),%xmm6
   .byte  15,89,245                           // mulps         %xmm5,%xmm6
   .byte  15,88,244                           // addps         %xmm4,%xmm6
   .byte  15,89,245                           // mulps         %xmm5,%xmm6
-  .byte  15,88,176,145,114,0,0               // addps         0x7291(%eax),%xmm6
+  .byte  15,88,176,153,114,0,0               // addps         0x7299(%eax),%xmm6
   .byte  15,17,182,0,1,0,0                   // movups        %xmm6,0x100(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -74005,18 +73965,18 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          723f <_sk_bicubic_p3x_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          71e7 <_sk_bicubic_p3x_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,6                             // movups        (%esi),%xmm0
   .byte  15,16,166,128,0,0,0                 // movups        0x80(%esi),%xmm4
-  .byte  15,88,128,33,114,0,0                // addps         0x7221(%eax),%xmm0
+  .byte  15,88,128,41,114,0,0                // addps         0x7229(%eax),%xmm0
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
   .byte  15,89,237                           // mulps         %xmm5,%xmm5
-  .byte  15,89,160,1,114,0,0                 // mulps         0x7201(%eax),%xmm4
-  .byte  15,88,160,113,109,0,0               // addps         0x6d71(%eax),%xmm4
+  .byte  15,89,160,9,114,0,0                 // mulps         0x7209(%eax),%xmm4
+  .byte  15,88,160,201,109,0,0               // addps         0x6dc9(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
   .byte  15,17,166,0,1,0,0                   // movups        %xmm4,0x100(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
@@ -74037,20 +73997,20 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          7292 <_sk_bicubic_n3y_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          723a <_sk_bicubic_n3y_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,78,64                         // movups        0x40(%esi),%xmm1
   .byte  15,16,166,192,0,0,0                 // movups        0xc0(%esi),%xmm4
-  .byte  15,88,136,158,113,0,0               // addps         0x719e(%eax),%xmm1
-  .byte  15,40,168,46,107,0,0                // movaps        0x6b2e(%eax),%xmm5
+  .byte  15,88,136,166,113,0,0               // addps         0x71a6(%eax),%xmm1
+  .byte  15,40,168,134,107,0,0               // movaps        0x6b86(%eax),%xmm5
   .byte  15,92,236                           // subps         %xmm4,%xmm5
   .byte  15,40,229                           // movaps        %xmm5,%xmm4
   .byte  15,89,228                           // mulps         %xmm4,%xmm4
-  .byte  15,89,168,174,113,0,0               // mulps         0x71ae(%eax),%xmm5
-  .byte  15,88,168,30,109,0,0                // addps         0x6d1e(%eax),%xmm5
+  .byte  15,89,168,182,113,0,0               // mulps         0x71b6(%eax),%xmm5
+  .byte  15,88,168,118,109,0,0               // addps         0x6d76(%eax),%xmm5
   .byte  15,89,236                           // mulps         %xmm4,%xmm5
   .byte  15,17,174,64,1,0,0                  // movups        %xmm5,0x140(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
@@ -74071,23 +74031,23 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          72f0 <_sk_bicubic_n1y_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          7298 <_sk_bicubic_n1y_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,78,64                         // movups        0x40(%esi),%xmm1
   .byte  15,16,166,192,0,0,0                 // movups        0xc0(%esi),%xmm4
-  .byte  15,88,136,48,113,0,0                // addps         0x7130(%eax),%xmm1
-  .byte  15,40,168,208,106,0,0               // movaps        0x6ad0(%eax),%xmm5
+  .byte  15,88,136,56,113,0,0                // addps         0x7138(%eax),%xmm1
+  .byte  15,40,168,40,107,0,0                // movaps        0x6b28(%eax),%xmm5
   .byte  15,92,236                           // subps         %xmm4,%xmm5
-  .byte  15,40,160,96,113,0,0                // movaps        0x7160(%eax),%xmm4
+  .byte  15,40,160,104,113,0,0               // movaps        0x7168(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
-  .byte  15,88,160,112,113,0,0               // addps         0x7170(%eax),%xmm4
+  .byte  15,88,160,120,113,0,0               // addps         0x7178(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
-  .byte  15,88,160,192,106,0,0               // addps         0x6ac0(%eax),%xmm4
+  .byte  15,88,160,24,107,0,0                // addps         0x6b18(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
-  .byte  15,88,160,128,113,0,0               // addps         0x7180(%eax),%xmm4
+  .byte  15,88,160,136,113,0,0               // addps         0x7188(%eax),%xmm4
   .byte  15,17,166,64,1,0,0                  // movups        %xmm4,0x140(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -74107,22 +74067,22 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          735c <_sk_bicubic_p1y_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          7304 <_sk_bicubic_p1y_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
-  .byte  15,40,160,84,106,0,0                // movaps        0x6a54(%eax),%xmm4
+  .byte  15,40,160,172,106,0,0               // movaps        0x6aac(%eax),%xmm4
   .byte  15,16,78,64                         // movups        0x40(%esi),%xmm1
   .byte  15,16,174,192,0,0,0                 // movups        0xc0(%esi),%xmm5
   .byte  15,88,204                           // addps         %xmm4,%xmm1
-  .byte  15,40,176,244,112,0,0               // movaps        0x70f4(%eax),%xmm6
+  .byte  15,40,176,252,112,0,0               // movaps        0x70fc(%eax),%xmm6
   .byte  15,89,245                           // mulps         %xmm5,%xmm6
-  .byte  15,88,176,4,113,0,0                 // addps         0x7104(%eax),%xmm6
+  .byte  15,88,176,12,113,0,0                // addps         0x710c(%eax),%xmm6
   .byte  15,89,245                           // mulps         %xmm5,%xmm6
   .byte  15,88,244                           // addps         %xmm4,%xmm6
   .byte  15,89,245                           // mulps         %xmm5,%xmm6
-  .byte  15,88,176,20,113,0,0                // addps         0x7114(%eax),%xmm6
+  .byte  15,88,176,28,113,0,0                // addps         0x711c(%eax),%xmm6
   .byte  15,17,182,64,1,0,0                  // movups        %xmm6,0x140(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
@@ -74142,18 +74102,18 @@
   .byte  137,229                             // mov           %esp,%ebp
   .byte  86                                  // push          %esi
   .byte  80                                  // push          %eax
-  .byte  232,0,0,0,0                         // call          73bd <_sk_bicubic_p3y_sse2+0xa>
+  .byte  232,0,0,0,0                         // call          7365 <_sk_bicubic_p3y_sse2+0xa>
   .byte  88                                  // pop           %eax
   .byte  139,77,8                            // mov           0x8(%ebp),%ecx
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  139,50                              // mov           (%edx),%esi
   .byte  15,16,78,64                         // movups        0x40(%esi),%xmm1
   .byte  15,16,166,192,0,0,0                 // movups        0xc0(%esi),%xmm4
-  .byte  15,88,136,163,112,0,0               // addps         0x70a3(%eax),%xmm1
+  .byte  15,88,136,171,112,0,0               // addps         0x70ab(%eax),%xmm1
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
   .byte  15,89,237                           // mulps         %xmm5,%xmm5
-  .byte  15,89,160,131,112,0,0               // mulps         0x7083(%eax),%xmm4
-  .byte  15,88,160,243,107,0,0               // addps         0x6bf3(%eax),%xmm4
+  .byte  15,89,160,139,112,0,0               // mulps         0x708b(%eax),%xmm4
+  .byte  15,88,160,75,108,0,0                // addps         0x6c4b(%eax),%xmm4
   .byte  15,89,229                           // mulps         %xmm5,%xmm4
   .byte  15,17,166,64,1,0,0                  // movups        %xmm4,0x140(%esi)
   .byte  141,66,8                            // lea           0x8(%edx),%eax
@@ -74296,7 +74256,7 @@
   .byte  102,15,112,192,232                  // pshufd        $0xe8,%xmm0,%xmm0
   .byte  102,15,98,200                       // punpckldq     %xmm0,%xmm1
   .byte  102,15,127,77,136                   // movdqa        %xmm1,-0x78(%ebp)
-  .byte  232,0,0,0,0                         // call          75a3 <_sk_clut_3D_sse2+0xee>
+  .byte  232,0,0,0,0                         // call          754b <_sk_clut_3D_sse2+0xee>
   .byte  90                                  // pop           %edx
   .byte  243,15,91,196                       // cvttps2dq     %xmm4,%xmm0
   .byte  102,15,127,133,8,255,255,255        // movdqa        %xmm0,-0xf8(%ebp)
@@ -74304,7 +74264,7 @@
   .byte  102,15,254,216                      // paddd         %xmm0,%xmm3
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,15,254,195                      // paddd         %xmm3,%xmm0
-  .byte  102,15,111,162,237,110,0,0          // movdqa        0x6eed(%edx),%xmm4
+  .byte  102,15,111,162,245,110,0,0          // movdqa        0x6ef5(%edx),%xmm4
   .byte  102,15,127,165,120,255,255,255      // movdqa        %xmm4,-0x88(%ebp)
   .byte  102,15,112,200,245                  // pshufd        $0xf5,%xmm0,%xmm1
   .byte  102,15,244,196                      // pmuludq       %xmm4,%xmm0
@@ -74344,7 +74304,7 @@
   .byte  243,15,16,4,129                     // movss         (%ecx,%eax,4),%xmm0
   .byte  15,20,248                           // unpcklps      %xmm0,%xmm7
   .byte  102,15,20,252                       // unpcklpd      %xmm4,%xmm7
-  .byte  102,15,111,130,93,104,0,0           // movdqa        0x685d(%edx),%xmm0
+  .byte  102,15,111,130,181,104,0,0          // movdqa        0x68b5(%edx),%xmm0
   .byte  102,15,127,133,104,255,255,255      // movdqa        %xmm0,-0x98(%ebp)
   .byte  102,15,254,200                      // paddd         %xmm0,%xmm1
   .byte  102,15,112,193,229                  // pshufd        $0xe5,%xmm1,%xmm0
@@ -74361,7 +74321,7 @@
   .byte  243,15,16,4,129                     // movss         (%ecx,%eax,4),%xmm0
   .byte  15,20,240                           // unpcklps      %xmm0,%xmm6
   .byte  102,15,20,241                       // unpcklpd      %xmm1,%xmm6
-  .byte  15,40,130,221,110,0,0               // movaps        0x6edd(%edx),%xmm0
+  .byte  15,40,130,229,110,0,0               // movaps        0x6ee5(%edx),%xmm0
   .byte  15,40,141,248,254,255,255           // movaps        -0x108(%ebp),%xmm1
   .byte  15,88,200                           // addps         %xmm0,%xmm1
   .byte  15,41,77,200                        // movaps        %xmm1,-0x38(%ebp)
@@ -74981,7 +74941,7 @@
   .byte  102,15,112,201,232                  // pshufd        $0xe8,%xmm1,%xmm1
   .byte  102,15,98,217                       // punpckldq     %xmm1,%xmm3
   .byte  102,15,127,93,152                   // movdqa        %xmm3,-0x68(%ebp)
-  .byte  232,0,0,0,0                         // call          812f <_sk_clut_4D_sse2+0x165>
+  .byte  232,0,0,0,0                         // call          80d7 <_sk_clut_4D_sse2+0x165>
   .byte  89                                  // pop           %ecx
   .byte  15,40,197                           // movaps        %xmm5,%xmm0
   .byte  15,41,133,168,254,255,255           // movaps        %xmm0,-0x158(%ebp)
@@ -74992,7 +74952,7 @@
   .byte  102,15,111,212                      // movdqa        %xmm4,%xmm2
   .byte  102,15,254,214                      // paddd         %xmm6,%xmm2
   .byte  102,15,254,218                      // paddd         %xmm2,%xmm3
-  .byte  102,15,111,161,97,99,0,0            // movdqa        0x6361(%ecx),%xmm4
+  .byte  102,15,111,161,105,99,0,0           // movdqa        0x6369(%ecx),%xmm4
   .byte  102,15,112,203,245                  // pshufd        $0xf5,%xmm3,%xmm1
   .byte  102,15,244,220                      // pmuludq       %xmm4,%xmm3
   .byte  102,15,244,204                      // pmuludq       %xmm4,%xmm1
@@ -75034,7 +74994,7 @@
   .byte  15,20,223                           // unpcklps      %xmm7,%xmm3
   .byte  102,15,20,222                       // unpcklpd      %xmm6,%xmm3
   .byte  102,15,41,157,232,254,255,255       // movapd        %xmm3,-0x118(%ebp)
-  .byte  102,15,254,137,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm1
+  .byte  102,15,254,137,41,93,0,0            // paddd         0x5d29(%ecx),%xmm1
   .byte  102,15,112,241,229                  // pshufd        $0xe5,%xmm1,%xmm6
   .byte  102,15,126,240                      // movd          %xmm6,%eax
   .byte  102,15,112,241,78                   // pshufd        $0x4e,%xmm1,%xmm6
@@ -75049,7 +75009,7 @@
   .byte  243,15,16,60,130                    // movss         (%edx,%eax,4),%xmm7
   .byte  15,20,223                           // unpcklps      %xmm7,%xmm3
   .byte  102,15,20,222                       // unpcklpd      %xmm6,%xmm3
-  .byte  15,40,185,81,99,0,0                 // movaps        0x6351(%ecx),%xmm7
+  .byte  15,40,185,89,99,0,0                 // movaps        0x6359(%ecx),%xmm7
   .byte  15,88,199                           // addps         %xmm7,%xmm0
   .byte  15,41,133,120,255,255,255           // movaps        %xmm0,-0x88(%ebp)
   .byte  15,40,133,248,254,255,255           // movaps        -0x108(%ebp),%xmm0
@@ -75070,7 +75030,7 @@
   .byte  102,15,127,101,168                  // movdqa        %xmm4,-0x58(%ebp)
   .byte  102,15,254,212                      // paddd         %xmm4,%xmm2
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,161,97,99,0,0            // movdqa        0x6361(%ecx),%xmm4
+  .byte  102,15,111,161,105,99,0,0           // movdqa        0x6369(%ecx),%xmm4
   .byte  102,15,244,212                      // pmuludq       %xmm4,%xmm2
   .byte  102,15,244,196                      // pmuludq       %xmm4,%xmm0
   .byte  102,15,112,240,232                  // pshufd        $0xe8,%xmm0,%xmm6
@@ -75106,7 +75066,7 @@
   .byte  243,15,16,44,130                    // movss         (%edx,%eax,4),%xmm5
   .byte  15,20,213                           // unpcklps      %xmm5,%xmm2
   .byte  102,15,20,214                       // unpcklpd      %xmm6,%xmm2
-  .byte  102,15,254,129,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm0
+  .byte  102,15,254,129,41,93,0,0            // paddd         0x5d29(%ecx),%xmm0
   .byte  102,15,112,232,229                  // pshufd        $0xe5,%xmm0,%xmm5
   .byte  102,15,126,232                      // movd          %xmm5,%eax
   .byte  102,15,112,232,78                   // pshufd        $0x4e,%xmm0,%xmm5
@@ -75190,7 +75150,7 @@
   .byte  243,15,16,44,130                    // movss         (%edx,%eax,4),%xmm5
   .byte  15,20,245                           // unpcklps      %xmm5,%xmm6
   .byte  102,15,20,240                       // unpcklpd      %xmm0,%xmm6
-  .byte  102,15,254,137,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm1
+  .byte  102,15,254,137,41,93,0,0            // paddd         0x5d29(%ecx),%xmm1
   .byte  102,15,112,193,229                  // pshufd        $0xe5,%xmm1,%xmm0
   .byte  102,15,126,192                      // movd          %xmm0,%eax
   .byte  102,15,112,193,78                   // pshufd        $0x4e,%xmm1,%xmm0
@@ -75208,7 +75168,7 @@
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
   .byte  102,15,254,77,168                   // paddd         -0x58(%ebp),%xmm1
   .byte  102,15,112,193,245                  // pshufd        $0xf5,%xmm1,%xmm0
-  .byte  102,15,111,145,97,99,0,0            // movdqa        0x6361(%ecx),%xmm2
+  .byte  102,15,111,145,105,99,0,0           // movdqa        0x6369(%ecx),%xmm2
   .byte  102,15,244,202                      // pmuludq       %xmm2,%xmm1
   .byte  102,15,244,194                      // pmuludq       %xmm2,%xmm0
   .byte  102,15,112,192,232                  // pshufd        $0xe8,%xmm0,%xmm0
@@ -75244,7 +75204,7 @@
   .byte  243,15,16,4,130                     // movss         (%edx,%eax,4),%xmm0
   .byte  15,20,216                           // unpcklps      %xmm0,%xmm3
   .byte  102,15,20,217                       // unpcklpd      %xmm1,%xmm3
-  .byte  102,15,254,169,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm5
+  .byte  102,15,254,169,41,93,0,0            // paddd         0x5d29(%ecx),%xmm5
   .byte  102,15,112,197,229                  // pshufd        $0xe5,%xmm5,%xmm0
   .byte  102,15,126,192                      // movd          %xmm0,%eax
   .byte  102,15,112,197,78                   // pshufd        $0x4e,%xmm5,%xmm0
@@ -75304,7 +75264,7 @@
   .byte  102,15,111,209                      // movdqa        %xmm1,%xmm2
   .byte  102,15,254,85,152                   // paddd         -0x68(%ebp),%xmm2
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,153,97,99,0,0            // movdqa        0x6361(%ecx),%xmm3
+  .byte  102,15,111,153,105,99,0,0           // movdqa        0x6369(%ecx),%xmm3
   .byte  102,15,244,211                      // pmuludq       %xmm3,%xmm2
   .byte  102,15,244,195                      // pmuludq       %xmm3,%xmm0
   .byte  102,15,111,251                      // movdqa        %xmm3,%xmm7
@@ -75343,7 +75303,7 @@
   .byte  243,15,16,44,130                    // movss         (%edx,%eax,4),%xmm5
   .byte  15,20,229                           // unpcklps      %xmm5,%xmm4
   .byte  102,15,20,227                       // unpcklpd      %xmm3,%xmm4
-  .byte  102,15,254,129,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm0
+  .byte  102,15,254,129,41,93,0,0            // paddd         0x5d29(%ecx),%xmm0
   .byte  102,15,112,216,229                  // pshufd        $0xe5,%xmm0,%xmm3
   .byte  102,15,126,216                      // movd          %xmm3,%eax
   .byte  102,15,112,216,78                   // pshufd        $0x4e,%xmm0,%xmm3
@@ -75395,7 +75355,7 @@
   .byte  243,15,16,52,130                    // movss         (%edx,%eax,4),%xmm6
   .byte  15,20,238                           // unpcklps      %xmm6,%xmm5
   .byte  102,15,20,235                       // unpcklpd      %xmm3,%xmm5
-  .byte  102,15,111,137,209,92,0,0           // movdqa        0x5cd1(%ecx),%xmm1
+  .byte  102,15,111,137,41,93,0,0            // movdqa        0x5d29(%ecx),%xmm1
   .byte  102,15,254,249                      // paddd         %xmm1,%xmm7
   .byte  102,15,112,223,229                  // pshufd        $0xe5,%xmm7,%xmm3
   .byte  102,15,126,216                      // movd          %xmm3,%eax
@@ -75429,7 +75389,7 @@
   .byte  102,15,111,226                      // movdqa        %xmm2,%xmm4
   .byte  102,15,254,69,152                   // paddd         -0x68(%ebp),%xmm0
   .byte  102,15,112,208,245                  // pshufd        $0xf5,%xmm0,%xmm2
-  .byte  102,15,111,153,97,99,0,0            // movdqa        0x6361(%ecx),%xmm3
+  .byte  102,15,111,153,105,99,0,0           // movdqa        0x6369(%ecx),%xmm3
   .byte  102,15,244,195                      // pmuludq       %xmm3,%xmm0
   .byte  102,15,244,211                      // pmuludq       %xmm3,%xmm2
   .byte  102,15,112,218,232                  // pshufd        $0xe8,%xmm2,%xmm3
@@ -75486,7 +75446,7 @@
   .byte  102,15,111,212                      // movdqa        %xmm4,%xmm2
   .byte  102,15,254,85,168                   // paddd         -0x58(%ebp),%xmm2
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,137,97,99,0,0            // movdqa        0x6361(%ecx),%xmm1
+  .byte  102,15,111,137,105,99,0,0           // movdqa        0x6369(%ecx),%xmm1
   .byte  102,15,244,209                      // pmuludq       %xmm1,%xmm2
   .byte  102,15,244,193                      // pmuludq       %xmm1,%xmm0
   .byte  102,15,112,240,232                  // pshufd        $0xe8,%xmm0,%xmm6
@@ -75523,7 +75483,7 @@
   .byte  243,15,16,36,130                    // movss         (%edx,%eax,4),%xmm4
   .byte  15,20,212                           // unpcklps      %xmm4,%xmm2
   .byte  102,15,20,214                       // unpcklpd      %xmm6,%xmm2
-  .byte  102,15,254,129,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm0
+  .byte  102,15,254,129,41,93,0,0            // paddd         0x5d29(%ecx),%xmm0
   .byte  102,15,112,224,229                  // pshufd        $0xe5,%xmm0,%xmm4
   .byte  102,15,126,224                      // movd          %xmm4,%eax
   .byte  102,15,112,224,78                   // pshufd        $0x4e,%xmm0,%xmm4
@@ -75591,7 +75551,7 @@
   .byte  102,15,111,93,152                   // movdqa        -0x68(%ebp),%xmm3
   .byte  102,15,254,217                      // paddd         %xmm1,%xmm3
   .byte  102,15,112,211,245                  // pshufd        $0xf5,%xmm3,%xmm2
-  .byte  102,15,111,129,97,99,0,0            // movdqa        0x6361(%ecx),%xmm0
+  .byte  102,15,111,129,105,99,0,0           // movdqa        0x6369(%ecx),%xmm0
   .byte  102,15,244,216                      // pmuludq       %xmm0,%xmm3
   .byte  102,15,244,208                      // pmuludq       %xmm0,%xmm2
   .byte  102,15,111,248                      // movdqa        %xmm0,%xmm7
@@ -75630,7 +75590,7 @@
   .byte  243,15,16,28,130                    // movss         (%edx,%eax,4),%xmm3
   .byte  15,20,235                           // unpcklps      %xmm3,%xmm5
   .byte  102,15,20,238                       // unpcklpd      %xmm6,%xmm5
-  .byte  102,15,254,145,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm2
+  .byte  102,15,254,145,41,93,0,0            // paddd         0x5d29(%ecx),%xmm2
   .byte  102,15,112,218,229                  // pshufd        $0xe5,%xmm2,%xmm3
   .byte  102,15,126,216                      // movd          %xmm3,%eax
   .byte  102,15,112,218,78                   // pshufd        $0x4e,%xmm2,%xmm3
@@ -75682,7 +75642,7 @@
   .byte  243,15,16,4,130                     // movss         (%edx,%eax,4),%xmm0
   .byte  15,20,208                           // unpcklps      %xmm0,%xmm2
   .byte  102,15,20,215                       // unpcklpd      %xmm7,%xmm2
-  .byte  102,15,111,137,209,92,0,0           // movdqa        0x5cd1(%ecx),%xmm1
+  .byte  102,15,111,137,41,93,0,0            // movdqa        0x5d29(%ecx),%xmm1
   .byte  102,15,254,217                      // paddd         %xmm1,%xmm3
   .byte  102,15,112,195,229                  // pshufd        $0xe5,%xmm3,%xmm0
   .byte  102,15,126,192                      // movd          %xmm0,%eax
@@ -75717,7 +75677,7 @@
   .byte  102,15,111,194                      // movdqa        %xmm2,%xmm0
   .byte  102,15,254,69,152                   // paddd         -0x68(%ebp),%xmm0
   .byte  102,15,112,224,245                  // pshufd        $0xf5,%xmm0,%xmm4
-  .byte  102,15,111,169,97,99,0,0            // movdqa        0x6361(%ecx),%xmm5
+  .byte  102,15,111,169,105,99,0,0           // movdqa        0x6369(%ecx),%xmm5
   .byte  102,15,111,221                      // movdqa        %xmm5,%xmm3
   .byte  102,15,244,195                      // pmuludq       %xmm3,%xmm0
   .byte  102,15,244,227                      // pmuludq       %xmm3,%xmm4
@@ -75773,7 +75733,7 @@
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
   .byte  102,15,254,77,168                   // paddd         -0x58(%ebp),%xmm1
   .byte  102,15,112,193,245                  // pshufd        $0xf5,%xmm1,%xmm0
-  .byte  102,15,111,145,97,99,0,0            // movdqa        0x6361(%ecx),%xmm2
+  .byte  102,15,111,145,105,99,0,0           // movdqa        0x6369(%ecx),%xmm2
   .byte  102,15,244,202                      // pmuludq       %xmm2,%xmm1
   .byte  102,15,244,194                      // pmuludq       %xmm2,%xmm0
   .byte  102,15,112,192,232                  // pshufd        $0xe8,%xmm0,%xmm0
@@ -75809,7 +75769,7 @@
   .byte  243,15,16,4,130                     // movss         (%edx,%eax,4),%xmm0
   .byte  15,20,216                           // unpcklps      %xmm0,%xmm3
   .byte  102,15,20,217                       // unpcklpd      %xmm1,%xmm3
-  .byte  102,15,254,185,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm7
+  .byte  102,15,254,185,41,93,0,0            // paddd         0x5d29(%ecx),%xmm7
   .byte  102,15,112,199,229                  // pshufd        $0xe5,%xmm7,%xmm0
   .byte  102,15,126,192                      // movd          %xmm0,%eax
   .byte  102,15,112,199,78                   // pshufd        $0x4e,%xmm7,%xmm0
@@ -75859,7 +75819,7 @@
   .byte  102,15,111,233                      // movdqa        %xmm1,%xmm5
   .byte  102,15,254,69,152                   // paddd         -0x68(%ebp),%xmm0
   .byte  102,15,112,200,245                  // pshufd        $0xf5,%xmm0,%xmm1
-  .byte  102,15,111,145,97,99,0,0            // movdqa        0x6361(%ecx),%xmm2
+  .byte  102,15,111,145,105,99,0,0           // movdqa        0x6369(%ecx),%xmm2
   .byte  102,15,244,194                      // pmuludq       %xmm2,%xmm0
   .byte  102,15,244,202                      // pmuludq       %xmm2,%xmm1
   .byte  102,15,111,250                      // movdqa        %xmm2,%xmm7
@@ -75898,7 +75858,7 @@
   .byte  243,15,16,36,130                    // movss         (%edx,%eax,4),%xmm4
   .byte  15,20,204                           // unpcklps      %xmm4,%xmm1
   .byte  102,15,20,203                       // unpcklpd      %xmm3,%xmm1
-  .byte  102,15,254,145,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm2
+  .byte  102,15,254,145,41,93,0,0            // paddd         0x5d29(%ecx),%xmm2
   .byte  102,15,112,218,229                  // pshufd        $0xe5,%xmm2,%xmm3
   .byte  102,15,126,216                      // movd          %xmm3,%eax
   .byte  102,15,112,218,78                   // pshufd        $0x4e,%xmm2,%xmm3
@@ -75952,7 +75912,7 @@
   .byte  243,15,16,36,130                    // movss         (%edx,%eax,4),%xmm4
   .byte  15,20,252                           // unpcklps      %xmm4,%xmm7
   .byte  102,15,20,254                       // unpcklpd      %xmm6,%xmm7
-  .byte  102,15,254,153,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm3
+  .byte  102,15,254,153,41,93,0,0            // paddd         0x5d29(%ecx),%xmm3
   .byte  102,15,112,227,229                  // pshufd        $0xe5,%xmm3,%xmm4
   .byte  102,15,126,224                      // movd          %xmm4,%eax
   .byte  102,15,112,227,78                   // pshufd        $0x4e,%xmm3,%xmm4
@@ -75985,7 +75945,7 @@
   .byte  102,15,254,208                      // paddd         %xmm0,%xmm2
   .byte  102,15,111,216                      // movdqa        %xmm0,%xmm3
   .byte  102,15,112,194,245                  // pshufd        $0xf5,%xmm2,%xmm0
-  .byte  102,15,111,137,97,99,0,0            // movdqa        0x6361(%ecx),%xmm1
+  .byte  102,15,111,137,105,99,0,0           // movdqa        0x6369(%ecx),%xmm1
   .byte  102,15,244,209                      // pmuludq       %xmm1,%xmm2
   .byte  102,15,244,193                      // pmuludq       %xmm1,%xmm0
   .byte  102,15,111,241                      // movdqa        %xmm1,%xmm6
@@ -76025,7 +75985,7 @@
   .byte  15,20,225                           // unpcklps      %xmm1,%xmm4
   .byte  102,15,20,226                       // unpcklpd      %xmm2,%xmm4
   .byte  102,15,41,165,88,255,255,255        // movapd        %xmm4,-0xa8(%ebp)
-  .byte  102,15,254,129,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm0
+  .byte  102,15,254,129,41,93,0,0            // paddd         0x5d29(%ecx),%xmm0
   .byte  102,15,112,200,229                  // pshufd        $0xe5,%xmm0,%xmm1
   .byte  102,15,126,200                      // movd          %xmm1,%eax
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
@@ -76079,7 +76039,7 @@
   .byte  243,15,16,52,130                    // movss         (%edx,%eax,4),%xmm6
   .byte  15,20,206                           // unpcklps      %xmm6,%xmm1
   .byte  102,15,20,203                       // unpcklpd      %xmm3,%xmm1
-  .byte  102,15,254,145,209,92,0,0           // paddd         0x5cd1(%ecx),%xmm2
+  .byte  102,15,254,145,41,93,0,0            // paddd         0x5d29(%ecx),%xmm2
   .byte  102,15,112,218,229                  // pshufd        $0xe5,%xmm2,%xmm3
   .byte  102,15,126,216                      // movd          %xmm3,%eax
   .byte  102,15,112,218,78                   // pshufd        $0x4e,%xmm2,%xmm3
@@ -76148,7 +76108,7 @@
   .byte  139,85,12                           // mov           0xc(%ebp),%edx
   .byte  141,66,8                            // lea           0x8(%edx),%eax
   .byte  131,236,8                           // sub           $0x8,%esp
-  .byte  15,40,153,145,92,0,0                // movaps        0x5c91(%ecx),%xmm3
+  .byte  15,40,153,233,92,0,0                // movaps        0x5ce9(%ecx),%xmm3
   .byte  80                                  // push          %eax
   .byte  255,117,8                           // pushl         0x8(%ebp)
   .byte  255,82,4                            // call          *0x4(%edx)
@@ -76158,6 +76118,37 @@
   .byte  91                                  // pop           %ebx
   .byte  93                                  // pop           %ebp
   .byte  195                                 // ret
+
+HIDDEN _sk_gauss_a_to_rgba_sse2
+.globl _sk_gauss_a_to_rgba_sse2
+FUNCTION(_sk_gauss_a_to_rgba_sse2)
+_sk_gauss_a_to_rgba_sse2:
+  .byte  85                                  // push          %ebp
+  .byte  137,229                             // mov           %esp,%ebp
+  .byte  131,236,8                           // sub           $0x8,%esp
+  .byte  232,0,0,0,0                         // call          952c <_sk_gauss_a_to_rgba_sse2+0xb>
+  .byte  88                                  // pop           %eax
+  .byte  139,77,12                           // mov           0xc(%ebp),%ecx
+  .byte  15,40,128,36,79,0,0                 // movaps        0x4f24(%eax),%xmm0
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,128,52,79,0,0                 // addps         0x4f34(%eax),%xmm0
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,128,68,79,0,0                 // addps         0x4f44(%eax),%xmm0
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,128,84,79,0,0                 // addps         0x4f54(%eax),%xmm0
+  .byte  15,89,195                           // mulps         %xmm3,%xmm0
+  .byte  15,88,128,100,79,0,0                // addps         0x4f64(%eax),%xmm0
+  .byte  141,65,4                            // lea           0x4(%ecx),%eax
+  .byte  131,236,8                           // sub           $0x8,%esp
+  .byte  15,40,200                           // movaps        %xmm0,%xmm1
+  .byte  15,40,208                           // movaps        %xmm0,%xmm2
+  .byte  15,40,216                           // movaps        %xmm0,%xmm3
+  .byte  80                                  // push          %eax
+  .byte  255,117,8                           // pushl         0x8(%ebp)
+  .byte  255,17                              // call          *(%ecx)
+  .byte  131,196,24                          // add           $0x18,%esp
+  .byte  93                                  // pop           %ebp
+  .byte  195                                 // ret
   .byte  144                                 // nop
   .byte  144                                 // nop
   .byte  144                                 // nop
@@ -82226,47 +82217,20 @@
   .byte  0,0                                 // add           %al,(%eax)
   .byte  211,128,0,0,211,128                 // roll          %cl,-0x7f2d0000(%eax)
   .byte  0,0                                 // add           %al,(%eax)
-  .byte  211,128,0,0,211,45                  // roll          %cl,0x2dd30000(%eax)
-  .byte  16,17                               // adc           %dl,(%ecx)
-  .byte  192,45,16,17,192,45,16              // shrb          $0x10,0x2dc01110
-  .byte  17,192                              // adc           %eax,%eax
-  .byte  45,16,17,192,18                     // sub           $0x12c01110,%eax
-  .byte  120,57                              // js            e3ac <.literal16+0x5fc>
-  .byte  64                                  // inc           %eax
-  .byte  18,120,57                           // adc           0x39(%eax),%bh
-  .byte  64                                  // inc           %eax
-  .byte  18,120,57                           // adc           0x39(%eax),%bh
-  .byte  64                                  // inc           %eax
-  .byte  18,120,57                           // adc           0x39(%eax),%bh
-  .byte  64                                  // inc           %eax
-  .byte  32,148,90,62,32,148,90              // and           %dl,0x5a94203e(%edx,%ebx,2)
-  .byte  62,32,148,90,62,32,148,90           // and           %dl,%ds:0x5a94203e(%edx,%ebx,2)
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // push          %ds
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // push          %ds
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // push          %ds
-  .byte  62,4,157                            // ds            add $0x9d,%al
-  .byte  30                                  // push          %ds
-  .byte  62,0,24                             // add           %bl,%ds:(%eax)
-  .byte  161,57,0,24,161                     // mov           0xa1180039,%eax
-  .byte  57,0                                // cmp           %eax,(%eax)
-  .byte  24,161,57,0,24,161                  // sbb           %ah,-0x5ee7ffc7(%ecx)
-  .byte  57,111,43                           // cmp           %ebp,0x2b(%edi)
-  .byte  231,187                             // out           %eax,$0xbb
-  .byte  111                                 // outsl         %ds:(%esi),(%dx)
+  .byte  211,128,0,0,211,111                 // roll          %cl,0x6fd30000(%eax)
   .byte  43,231                              // sub           %edi,%esp
   .byte  187,111,43,231,187                  // mov           $0xbbe72b6f,%ebx
   .byte  111                                 // outsl         %ds:(%esi),(%dx)
   .byte  43,231                              // sub           %edi,%esp
-  .byte  187,159,215,202,60                  // mov           $0x3ccad79f,%ebx
+  .byte  187,111,43,231,187                  // mov           $0xbbe72b6f,%ebx
   .byte  159                                 // lahf
   .byte  215                                 // xlat          %ds:(%ebx)
   .byte  202,60,159                          // lret          $0x9f3c
   .byte  215                                 // xlat          %ds:(%ebx)
   .byte  202,60,159                          // lret          $0x9f3c
   .byte  215                                 // xlat          %ds:(%ebx)
+  .byte  202,60,159                          // lret          $0x9f3c
+  .byte  215                                 // xlat          %ds:(%ebx)
   .byte  202,60,212                          // lret          $0xd43c
   .byte  100,84                              // fs            push %esp
   .byte  189,212,100,84,189                  // mov           $0xbd5464d4,%ebp
@@ -82302,13 +82266,13 @@
   .byte  192,191,0,0,192,191,0               // sarb          $0x0,-0x40400000(%edi)
   .byte  0,192                               // add           %al,%al
   .byte  191,0,0,192,191                     // mov           $0xbfc00000,%edi
-  .byte  114,28                              // jb            e45e <.literal16+0x6ae>
+  .byte  114,28                              // jb            e40e <.literal16+0x65e>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         e462 <.literal16+0x6b2>
+  .byte  62,114,28                           // jb,pt         e412 <.literal16+0x662>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         e466 <.literal16+0x6b6>
+  .byte  62,114,28                           // jb,pt         e416 <.literal16+0x666>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         e46a <.literal16+0x6ba>
+  .byte  62,114,28                           // jb,pt         e41a <.literal16+0x66a>
   .byte  199                                 // (bad)
   .byte  62,85                               // ds            push %ebp
   .byte  85                                  // push          %ebp
@@ -82329,13 +82293,13 @@
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%esi)
   .byte  99,61,57,142,99,61                  // arpl          %di,0x3d638e39
   .byte  57,142,99,61,114,249                // cmp           %ecx,-0x68dc29d(%esi)
-  .byte  127,63                              // jg            e4c3 <.literal16+0x713>
-  .byte  114,249                             // jb            e47f <.literal16+0x6cf>
-  .byte  127,63                              // jg            e4c7 <.literal16+0x717>
-  .byte  114,249                             // jb            e483 <.literal16+0x6d3>
-  .byte  127,63                              // jg            e4cb <.literal16+0x71b>
-  .byte  114,249                             // jb            e487 <.literal16+0x6d7>
-  .byte  127,63                              // jg            e4cf <.literal16+0x71f>
+  .byte  127,63                              // jg            e473 <.literal16+0x6c3>
+  .byte  114,249                             // jb            e42f <.literal16+0x67f>
+  .byte  127,63                              // jg            e477 <.literal16+0x6c7>
+  .byte  114,249                             // jb            e433 <.literal16+0x683>
+  .byte  127,63                              // jg            e47b <.literal16+0x6cb>
+  .byte  114,249                             // jb            e437 <.literal16+0x687>
+  .byte  127,63                              // jg            e47f <.literal16+0x6cf>
   .byte  3,0                                 // add           (%eax),%eax
   .byte  0,0                                 // add           %al,(%eax)
   .byte  3,0                                 // add           (%eax),%eax
@@ -82344,25 +82308,53 @@
   .byte  0,0                                 // add           %al,(%eax)
   .byte  3,0                                 // add           (%eax),%eax
   .byte  0,0                                 // add           %al,(%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  255,0                               // incl          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
-  .byte  254,0                               // incb          (%eax)
+  .byte  45,16,17,192,45                     // sub           $0x2dc01110,%eax
+  .byte  16,17                               // adc           %dl,(%ecx)
+  .byte  192,45,16,17,192,45,16              // shrb          $0x10,0x2dc01110
+  .byte  17,192                              // adc           %eax,%eax
+  .byte  18,120,57                           // adc           0x39(%eax),%bh
+  .byte  64                                  // inc           %eax
+  .byte  18,120,57                           // adc           0x39(%eax),%bh
+  .byte  64                                  // inc           %eax
+  .byte  18,120,57                           // adc           0x39(%eax),%bh
+  .byte  64                                  // inc           %eax
+  .byte  18,120,57                           // adc           0x39(%eax),%bh
+  .byte  64                                  // inc           %eax
+  .byte  32,148,90,62,32,148,90              // and           %dl,0x5a94203e(%edx,%ebx,2)
+  .byte  62,32,148,90,62,32,148,90           // and           %dl,%ds:0x5a94203e(%edx,%ebx,2)
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // push          %ds
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // push          %ds
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // push          %ds
+  .byte  62,4,157                            // ds            add $0x9d,%al
+  .byte  30                                  // push          %ds
+  .byte  62,0,24                             // add           %bl,%ds:(%eax)
+  .byte  161,57,0,24,161                     // mov           0xa1180039,%eax
+  .byte  57,0                                // cmp           %eax,(%eax)
+  .byte  24,161,57,0,24,161                  // sbb           %ah,-0x5ee7ffc7(%ecx)
+  .byte  57,255                              // cmp           %edi,%edi
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,255                               // add           %bh,%bh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,254                               // add           %bh,%dh
+  .byte  0,0                                 // add           %al,(%eax)
+  .byte  128,0,128                           // addb          $0x80,(%eax)
   .byte  0,128,0,128,0,128                   // add           %al,-0x7fff8000(%eax)
   .byte  0,128,0,128,0,128                   // add           %al,-0x7fff8000(%eax)
-  .byte  0,128,0,128,254,1                   // add           %al,0x1fe8000(%eax)
+  .byte  254,1                               // incb          (%ecx)
   .byte  254,1                               // incb          (%ecx)
   .byte  254,1                               // incb          (%ecx)
   .byte  254,1                               // incb          (%ecx)
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 1365ba0..a8842f0 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -5845,33 +5845,16 @@
   DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
   DB  255,224                             ; jmpq          *%rax
 
-PUBLIC _sk_gauss_a_to_rgba_hsw
-_sk_gauss_a_to_rgba_hsw LABEL PROC
-  DB  196,226,125,24,5,154,133,2,0        ; vbroadcastss  0x2859a(%rip),%ymm0        # 2e524 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
-  DB  196,226,125,24,13,149,133,2,0       ; vbroadcastss  0x28595(%rip),%ymm1        # 2e528 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
-  DB  196,226,101,168,200                 ; vfmadd213ps   %ymm0,%ymm3,%ymm1
-  DB  196,226,125,24,5,139,133,2,0        ; vbroadcastss  0x2858b(%rip),%ymm0        # 2e52c <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
-  DB  196,226,101,184,193                 ; vfmadd231ps   %ymm1,%ymm3,%ymm0
-  DB  196,226,125,24,13,129,133,2,0       ; vbroadcastss  0x28581(%rip),%ymm1        # 2e530 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
-  DB  196,226,101,184,200                 ; vfmadd231ps   %ymm0,%ymm3,%ymm1
-  DB  196,226,125,24,5,119,133,2,0        ; vbroadcastss  0x28577(%rip),%ymm0        # 2e534 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
-  DB  196,226,101,184,193                 ; vfmadd231ps   %ymm1,%ymm3,%ymm0
-  DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
-  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
-  DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
-  DB  255,224                             ; jmpq          *%rax
-
 PUBLIC _sk_gradient_hsw
 _sk_gradient_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  73,131,248,1                        ; cmp           $0x1,%r8
-  DB  15,134,220,0,0,0                    ; jbe           60bd <_sk_gradient_hsw+0xeb>
+  DB  15,134,220,0,0,0                    ; jbe           606c <_sk_gradient_hsw+0xeb>
   DB  76,139,72,72                        ; mov           0x48(%rax),%r9
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  65,186,1,0,0,0                      ; mov           $0x1,%r10d
-  DB  196,226,125,24,21,4,132,2,0         ; vbroadcastss  0x28404(%rip),%ymm2        # 2e3fc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
+  DB  196,226,125,24,21,85,132,2,0        ; vbroadcastss  0x28455(%rip),%ymm2        # 2e3fc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
   DB  196,65,53,239,201                   ; vpxor         %ymm9,%ymm9,%ymm9
   DB  196,130,125,24,28,145               ; vbroadcastss  (%r9,%r10,4),%ymm3
   DB  197,228,194,216,2                   ; vcmpleps      %ymm0,%ymm3,%ymm3
@@ -5879,10 +5862,10 @@
   DB  197,53,254,203                      ; vpaddd        %ymm3,%ymm9,%ymm9
   DB  73,255,194                          ; inc           %r10
   DB  77,57,208                           ; cmp           %r10,%r8
-  DB  117,227                             ; jne           5ffd <_sk_gradient_hsw+0x2b>
+  DB  117,227                             ; jne           5fac <_sk_gradient_hsw+0x2b>
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  73,131,248,8                        ; cmp           $0x8,%r8
-  DB  15,134,158,0,0,0                    ; jbe           60c6 <_sk_gradient_hsw+0xf4>
+  DB  15,134,158,0,0,0                    ; jbe           6075 <_sk_gradient_hsw+0xf4>
   DB  196,65,13,118,246                   ; vpcmpeqd      %ymm14,%ymm14,%ymm14
   DB  196,65,36,87,219                    ; vxorps        %ymm11,%ymm11,%ymm11
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
@@ -5914,7 +5897,7 @@
   DB  196,130,5,146,28,136                ; vgatherdps    %ymm15,(%r8,%ymm9,4),%ymm3
   DB  72,139,64,64                        ; mov           0x40(%rax),%rax
   DB  196,34,13,146,28,136                ; vgatherdps    %ymm14,(%rax,%ymm9,4),%ymm11
-  DB  235,77                              ; jmp           610a <_sk_gradient_hsw+0x138>
+  DB  235,77                              ; jmp           60b9 <_sk_gradient_hsw+0x138>
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
   DB  196,66,53,22,1                      ; vpermps       (%r9),%ymm9,%ymm8
@@ -5970,24 +5953,24 @@
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,147,131,2,0        ; vbroadcastss  0x28393(%rip),%ymm13        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
-  DB  196,98,125,24,53,142,131,2,0        ; vbroadcastss  0x2838e(%rip),%ymm14        # 2e53c <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
+  DB  196,98,125,24,45,208,131,2,0        ; vbroadcastss  0x283d0(%rip),%ymm13        # 2e524 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
+  DB  196,98,125,24,53,203,131,2,0        ; vbroadcastss  0x283cb(%rip),%ymm14        # 2e528 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
   DB  196,66,29,184,245                   ; vfmadd231ps   %ymm13,%ymm12,%ymm14
-  DB  196,98,125,24,45,132,131,2,0        ; vbroadcastss  0x28384(%rip),%ymm13        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
+  DB  196,98,125,24,45,193,131,2,0        ; vbroadcastss  0x283c1(%rip),%ymm13        # 2e52c <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
   DB  196,66,29,184,238                   ; vfmadd231ps   %ymm14,%ymm12,%ymm13
-  DB  196,98,125,24,53,122,131,2,0        ; vbroadcastss  0x2837a(%rip),%ymm14        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  DB  196,98,125,24,53,183,131,2,0        ; vbroadcastss  0x283b7(%rip),%ymm14        # 2e530 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
   DB  196,66,29,184,245                   ; vfmadd231ps   %ymm13,%ymm12,%ymm14
   DB  196,65,36,89,222                    ; vmulps        %ymm14,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,101,131,2,0        ; vbroadcastss  0x28365(%rip),%ymm10        # 2e548 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  DB  196,98,125,24,21,162,131,2,0        ; vbroadcastss  0x283a2(%rip),%ymm10        # 2e534 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,247,129,2,0        ; vbroadcastss  0x281f7(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,21,72,130,2,0         ; vbroadcastss  0x28248(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,225,129,2,0        ; vbroadcastss  0x281e1(%rip),%ymm10        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,21,50,130,2,0         ; vbroadcastss  0x28232(%rip),%ymm10        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -6011,23 +5994,23 @@
   DB  197,50,89,80,76                     ; vmulss        0x4c(%rax),%xmm9,%xmm10
   DB  196,66,125,24,210                   ; vbroadcastss  %xmm10,%ymm10
   DB  197,44,88,208                       ; vaddps        %ymm0,%ymm10,%ymm10
-  DB  196,98,125,24,29,229,130,2,0        ; vbroadcastss  0x282e5(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  DB  196,98,125,24,29,34,131,2,0         ; vbroadcastss  0x28322(%rip),%ymm11        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
   DB  197,116,89,217                      ; vmulps        %ymm1,%ymm1,%ymm11
   DB  196,98,125,184,216                  ; vfmadd231ps   %ymm0,%ymm0,%ymm11
   DB  196,193,50,89,193                   ; vmulss        %xmm9,%xmm9,%xmm0
   DB  196,226,125,24,192                  ; vbroadcastss  %xmm0,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
-  DB  196,98,125,24,13,196,130,2,0        ; vbroadcastss  0x282c4(%rip),%ymm9        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,13,1,131,2,0          ; vbroadcastss  0x28301(%rip),%ymm9        # 2e53c <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  196,194,45,184,194                  ; vfmadd231ps   %ymm10,%ymm10,%ymm0
   DB  197,252,81,192                      ; vsqrtps       %ymm0,%ymm0
   DB  196,98,125,24,64,68                 ; vbroadcastss  0x44(%rax),%ymm8
-  DB  196,98,125,24,13,167,130,2,0        ; vbroadcastss  0x282a7(%rip),%ymm9        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,13,228,130,2,0        ; vbroadcastss  0x282e4(%rip),%ymm9        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,65,44,87,201                    ; vxorps        %ymm9,%ymm10,%ymm9
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,52,129,2,0         ; vbroadcastss  0x28134(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,29,133,129,2,0        ; vbroadcastss  0x28185(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  196,65,44,89,208                    ; vmulps        %ymm8,%ymm10,%ymm10
   DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
@@ -6044,23 +6027,23 @@
   DB  197,50,89,80,76                     ; vmulss        0x4c(%rax),%xmm9,%xmm10
   DB  196,66,125,24,210                   ; vbroadcastss  %xmm10,%ymm10
   DB  197,44,88,208                       ; vaddps        %ymm0,%ymm10,%ymm10
-  DB  196,98,125,24,29,77,130,2,0         ; vbroadcastss  0x2824d(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  DB  196,98,125,24,29,138,130,2,0        ; vbroadcastss  0x2828a(%rip),%ymm11        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
   DB  197,116,89,217                      ; vmulps        %ymm1,%ymm1,%ymm11
   DB  196,98,125,184,216                  ; vfmadd231ps   %ymm0,%ymm0,%ymm11
   DB  196,193,50,89,193                   ; vmulss        %xmm9,%xmm9,%xmm0
   DB  196,226,125,24,192                  ; vbroadcastss  %xmm0,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
-  DB  196,98,125,24,13,44,130,2,0         ; vbroadcastss  0x2822c(%rip),%ymm9        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,13,105,130,2,0        ; vbroadcastss  0x28269(%rip),%ymm9        # 2e53c <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  196,194,45,184,194                  ; vfmadd231ps   %ymm10,%ymm10,%ymm0
   DB  197,252,81,192                      ; vsqrtps       %ymm0,%ymm0
   DB  196,98,125,24,64,68                 ; vbroadcastss  0x44(%rax),%ymm8
-  DB  196,98,125,24,13,15,130,2,0         ; vbroadcastss  0x2820f(%rip),%ymm9        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,13,76,130,2,0         ; vbroadcastss  0x2824c(%rip),%ymm9        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,65,44,87,201                    ; vxorps        %ymm9,%ymm10,%ymm9
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,156,128,2,0        ; vbroadcastss  0x2809c(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,29,237,128,2,0        ; vbroadcastss  0x280ed(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  196,65,44,89,208                    ; vmulps        %ymm8,%ymm10,%ymm10
   DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
@@ -6076,14 +6059,14 @@
   DB  197,58,89,72,76                     ; vmulss        0x4c(%rax),%xmm8,%xmm9
   DB  196,66,125,24,201                   ; vbroadcastss  %xmm9,%ymm9
   DB  197,52,88,200                       ; vaddps        %ymm0,%ymm9,%ymm9
-  DB  196,98,125,24,21,187,129,2,0        ; vbroadcastss  0x281bb(%rip),%ymm10        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  DB  196,98,125,24,21,248,129,2,0        ; vbroadcastss  0x281f8(%rip),%ymm10        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
   DB  197,116,89,209                      ; vmulps        %ymm1,%ymm1,%ymm10
   DB  196,98,125,184,208                  ; vfmadd231ps   %ymm0,%ymm0,%ymm10
   DB  196,193,58,89,192                   ; vmulss        %xmm8,%xmm8,%xmm0
   DB  196,226,125,24,192                  ; vbroadcastss  %xmm0,%ymm0
   DB  197,172,92,192                      ; vsubps        %ymm0,%ymm10,%ymm0
-  DB  196,98,125,24,5,158,129,2,0         ; vbroadcastss  0x2819e(%rip),%ymm8        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,5,219,129,2,0         ; vbroadcastss  0x281db(%rip),%ymm8        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,193,124,87,192                  ; vxorps        %ymm8,%ymm0,%ymm0
   DB  196,193,124,94,193                  ; vdivps        %ymm9,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6120,7 +6103,7 @@
 PUBLIC _sk_save_xy_hsw
 _sk_save_xy_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,201,127,2,0         ; vbroadcastss  0x27fc9(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,5,26,128,2,0          ; vbroadcastss  0x2801a(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -6150,9 +6133,9 @@
 PUBLIC _sk_bilinear_nx_hsw
 _sk_bilinear_nx_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,183,128,2,0        ; vbroadcastss  0x280b7(%rip),%ymm0        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,5,244,128,2,0        ; vbroadcastss  0x280f4(%rip),%ymm0        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,74,127,2,0          ; vbroadcastss  0x27f4a(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,155,127,2,0         ; vbroadcastss  0x27f9b(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,128,0,0,0             ; vsubps        0x80(%rax),%ymm8,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6161,7 +6144,7 @@
 PUBLIC _sk_bilinear_px_hsw
 _sk_bilinear_px_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,39,127,2,0         ; vbroadcastss  0x27f27(%rip),%ymm0        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,226,125,24,5,120,127,2,0        ; vbroadcastss  0x27f78(%rip),%ymm0        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,128,128,0,0,0            ; vmovups       0x80(%rax),%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
@@ -6171,9 +6154,9 @@
 PUBLIC _sk_bilinear_ny_hsw
 _sk_bilinear_ny_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,104,128,2,0       ; vbroadcastss  0x28068(%rip),%ymm1        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,13,165,128,2,0       ; vbroadcastss  0x280a5(%rip),%ymm1        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,250,126,2,0         ; vbroadcastss  0x27efa(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,75,127,2,0          ; vbroadcastss  0x27f4b(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,192,0,0,0             ; vsubps        0xc0(%rax),%ymm8,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6182,7 +6165,7 @@
 PUBLIC _sk_bilinear_py_hsw
 _sk_bilinear_py_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,215,126,2,0       ; vbroadcastss  0x27ed7(%rip),%ymm1        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,226,125,24,13,40,127,2,0        ; vbroadcastss  0x27f28(%rip),%ymm1        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
   DB  197,124,16,128,192,0,0,0            ; vmovups       0xc0(%rax),%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
@@ -6192,13 +6175,13 @@
 PUBLIC _sk_bicubic_n3x_hsw
 _sk_bicubic_n3x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,27,128,2,0         ; vbroadcastss  0x2801b(%rip),%ymm0        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  DB  196,226,125,24,5,88,128,2,0         ; vbroadcastss  0x28058(%rip),%ymm0        # 2e548 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,170,126,2,0         ; vbroadcastss  0x27eaa(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,251,126,2,0         ; vbroadcastss  0x27efb(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,128,0,0,0             ; vsubps        0x80(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,8,127,2,0          ; vbroadcastss  0x27f08(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  DB  196,98,125,24,29,243,127,2,0        ; vbroadcastss  0x27ff3(%rip),%ymm11        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,89,127,2,0         ; vbroadcastss  0x27f59(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,29,48,128,2,0         ; vbroadcastss  0x28030(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,36,89,193                    ; vmulps        %ymm9,%ymm11,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
@@ -6208,16 +6191,16 @@
 PUBLIC _sk_bicubic_n1x_hsw
 _sk_bicubic_n1x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,202,127,2,0        ; vbroadcastss  0x27fca(%rip),%ymm0        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,5,7,128,2,0          ; vbroadcastss  0x28007(%rip),%ymm0        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,93,126,2,0          ; vbroadcastss  0x27e5d(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,174,126,2,0         ; vbroadcastss  0x27eae(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,128,0,0,0             ; vsubps        0x80(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,184,127,2,0        ; vbroadcastss  0x27fb8(%rip),%ymm9        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  DB  196,98,125,24,21,179,127,2,0        ; vbroadcastss  0x27fb3(%rip),%ymm10        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,13,245,127,2,0        ; vbroadcastss  0x27ff5(%rip),%ymm9        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,21,240,127,2,0        ; vbroadcastss  0x27ff0(%rip),%ymm10        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,66,61,168,209                   ; vfmadd213ps   %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,13,49,126,2,0         ; vbroadcastss  0x27e31(%rip),%ymm9        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,13,130,126,2,0        ; vbroadcastss  0x27e82(%rip),%ymm9        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,66,61,184,202                   ; vfmadd231ps   %ymm10,%ymm8,%ymm9
-  DB  196,98,125,24,21,155,127,2,0        ; vbroadcastss  0x27f9b(%rip),%ymm10        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,21,216,127,2,0        ; vbroadcastss  0x27fd8(%rip),%ymm10        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  197,124,17,144,0,1,0,0              ; vmovups       %ymm10,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6226,14 +6209,14 @@
 PUBLIC _sk_bicubic_p1x_hsw
 _sk_bicubic_p1x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,7,126,2,0           ; vbroadcastss  0x27e07(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,5,88,126,2,0          ; vbroadcastss  0x27e58(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,136,128,0,0,0            ; vmovups       0x80(%rax),%ymm9
-  DB  196,98,125,24,21,98,127,2,0         ; vbroadcastss  0x27f62(%rip),%ymm10        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  DB  196,98,125,24,29,93,127,2,0         ; vbroadcastss  0x27f5d(%rip),%ymm11        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,21,159,127,2,0        ; vbroadcastss  0x27f9f(%rip),%ymm10        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,29,154,127,2,0        ; vbroadcastss  0x27f9a(%rip),%ymm11        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,66,53,168,218                   ; vfmadd213ps   %ymm10,%ymm9,%ymm11
   DB  196,66,53,168,216                   ; vfmadd213ps   %ymm8,%ymm9,%ymm11
-  DB  196,98,125,24,5,78,127,2,0          ; vbroadcastss  0x27f4e(%rip),%ymm8        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,5,139,127,2,0         ; vbroadcastss  0x27f8b(%rip),%ymm8        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,66,53,184,195                   ; vfmadd231ps   %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6242,12 +6225,12 @@
 PUBLIC _sk_bicubic_p3x_hsw
 _sk_bicubic_p3x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,42,127,2,0         ; vbroadcastss  0x27f2a(%rip),%ymm0        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,226,125,24,5,103,127,2,0        ; vbroadcastss  0x27f67(%rip),%ymm0        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,128,128,0,0,0            ; vmovups       0x80(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,24,126,2,0         ; vbroadcastss  0x27e18(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  DB  196,98,125,24,29,3,127,2,0          ; vbroadcastss  0x27f03(%rip),%ymm11        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,105,126,2,0        ; vbroadcastss  0x27e69(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,29,64,127,2,0         ; vbroadcastss  0x27f40(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,52,89,195                    ; vmulps        %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
@@ -6257,13 +6240,13 @@
 PUBLIC _sk_bicubic_n3y_hsw
 _sk_bicubic_n3y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,222,126,2,0       ; vbroadcastss  0x27ede(%rip),%ymm1        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  DB  196,226,125,24,13,27,127,2,0        ; vbroadcastss  0x27f1b(%rip),%ymm1        # 2e548 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,108,125,2,0         ; vbroadcastss  0x27d6c(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,189,125,2,0         ; vbroadcastss  0x27dbd(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,192,0,0,0             ; vsubps        0xc0(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,202,125,2,0        ; vbroadcastss  0x27dca(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  DB  196,98,125,24,29,181,126,2,0        ; vbroadcastss  0x27eb5(%rip),%ymm11        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,27,126,2,0         ; vbroadcastss  0x27e1b(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,29,242,126,2,0        ; vbroadcastss  0x27ef2(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,36,89,193                    ; vmulps        %ymm9,%ymm11,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
@@ -6273,16 +6256,16 @@
 PUBLIC _sk_bicubic_n1y_hsw
 _sk_bicubic_n1y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,140,126,2,0       ; vbroadcastss  0x27e8c(%rip),%ymm1        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,13,201,126,2,0       ; vbroadcastss  0x27ec9(%rip),%ymm1        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,30,125,2,0          ; vbroadcastss  0x27d1e(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,111,125,2,0         ; vbroadcastss  0x27d6f(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,192,0,0,0             ; vsubps        0xc0(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,121,126,2,0        ; vbroadcastss  0x27e79(%rip),%ymm9        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  DB  196,98,125,24,21,116,126,2,0        ; vbroadcastss  0x27e74(%rip),%ymm10        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,13,182,126,2,0        ; vbroadcastss  0x27eb6(%rip),%ymm9        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,21,177,126,2,0        ; vbroadcastss  0x27eb1(%rip),%ymm10        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,66,61,168,209                   ; vfmadd213ps   %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,13,242,124,2,0        ; vbroadcastss  0x27cf2(%rip),%ymm9        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,13,67,125,2,0         ; vbroadcastss  0x27d43(%rip),%ymm9        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,66,61,184,202                   ; vfmadd231ps   %ymm10,%ymm8,%ymm9
-  DB  196,98,125,24,21,92,126,2,0         ; vbroadcastss  0x27e5c(%rip),%ymm10        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,21,153,126,2,0        ; vbroadcastss  0x27e99(%rip),%ymm10        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  197,124,17,144,64,1,0,0             ; vmovups       %ymm10,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6291,14 +6274,14 @@
 PUBLIC _sk_bicubic_p1y_hsw
 _sk_bicubic_p1y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,200,124,2,0         ; vbroadcastss  0x27cc8(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,5,25,125,2,0          ; vbroadcastss  0x27d19(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,188,88,72,64                    ; vaddps        0x40(%rax),%ymm8,%ymm1
   DB  197,124,16,136,192,0,0,0            ; vmovups       0xc0(%rax),%ymm9
-  DB  196,98,125,24,21,34,126,2,0         ; vbroadcastss  0x27e22(%rip),%ymm10        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
-  DB  196,98,125,24,29,29,126,2,0         ; vbroadcastss  0x27e1d(%rip),%ymm11        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,21,95,126,2,0         ; vbroadcastss  0x27e5f(%rip),%ymm10        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,29,90,126,2,0         ; vbroadcastss  0x27e5a(%rip),%ymm11        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,66,53,168,218                   ; vfmadd213ps   %ymm10,%ymm9,%ymm11
   DB  196,66,53,168,216                   ; vfmadd213ps   %ymm8,%ymm9,%ymm11
-  DB  196,98,125,24,5,14,126,2,0          ; vbroadcastss  0x27e0e(%rip),%ymm8        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,5,75,126,2,0          ; vbroadcastss  0x27e4b(%rip),%ymm8        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,66,53,184,195                   ; vfmadd231ps   %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -6307,12 +6290,12 @@
 PUBLIC _sk_bicubic_p3y_hsw
 _sk_bicubic_p3y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,234,125,2,0       ; vbroadcastss  0x27dea(%rip),%ymm1        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,226,125,24,13,39,126,2,0        ; vbroadcastss  0x27e27(%rip),%ymm1        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
   DB  197,124,16,128,192,0,0,0            ; vmovups       0xc0(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,215,124,2,0        ; vbroadcastss  0x27cd7(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
-  DB  196,98,125,24,29,194,125,2,0        ; vbroadcastss  0x27dc2(%rip),%ymm11        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,40,125,2,0         ; vbroadcastss  0x27d28(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,29,255,125,2,0        ; vbroadcastss  0x27dff(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,52,89,195                    ; vmulps        %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
@@ -6440,7 +6423,7 @@
   DB  196,98,93,64,218                    ; vpmulld       %ymm2,%ymm4,%ymm11
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  197,165,254,193                     ; vpaddd        %ymm1,%ymm11,%ymm0
-  DB  196,98,125,88,61,145,123,2,0        ; vpbroadcastd  0x27b91(%rip),%ymm15        # 2e574 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  DB  196,98,125,88,61,206,123,2,0        ; vpbroadcastd  0x27bce(%rip),%ymm15        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   DB  196,194,125,64,199                  ; vpmulld       %ymm15,%ymm0,%ymm0
   DB  197,213,239,237                     ; vpxor         %ymm5,%ymm5,%ymm5
   DB  197,237,118,210                     ; vpcmpeqd      %ymm2,%ymm2,%ymm2
@@ -6452,13 +6435,13 @@
   DB  197,213,118,237                     ; vpcmpeqd      %ymm5,%ymm5,%ymm5
   DB  196,226,85,146,60,144               ; vgatherdps    %ymm5,(%rax,%ymm2,4),%ymm7
   DB  197,252,17,188,36,128,2,0,0         ; vmovups       %ymm7,0x280(%rsp)
-  DB  196,226,125,88,61,219,121,2,0       ; vpbroadcastd  0x279db(%rip),%ymm7        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  DB  196,226,125,88,61,44,122,2,0        ; vpbroadcastd  0x27a2c(%rip),%ymm7        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   DB  197,253,254,199                     ; vpaddd        %ymm7,%ymm0,%ymm0
   DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
   DB  197,213,118,237                     ; vpcmpeqd      %ymm5,%ymm5,%ymm5
   DB  196,226,85,146,20,128               ; vgatherdps    %ymm5,(%rax,%ymm0,4),%ymm2
   DB  197,252,17,148,36,160,1,0,0         ; vmovups       %ymm2,0x1a0(%rsp)
-  DB  196,226,125,24,5,35,123,2,0         ; vbroadcastss  0x27b23(%rip),%ymm0        # 2e570 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  DB  196,226,125,24,5,96,123,2,0         ; vbroadcastss  0x27b60(%rip),%ymm0        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   DB  197,172,88,232                      ; vaddps        %ymm0,%ymm10,%ymm5
   DB  197,254,91,237                      ; vcvttps2dq    %ymm5,%ymm5
   DB  196,226,93,64,213                   ; vpmulld       %ymm5,%ymm4,%ymm2
@@ -6718,7 +6701,7 @@
   DB  197,254,127,132,36,128,1,0,0        ; vmovdqu       %ymm0,0x180(%rsp)
   DB  196,98,109,64,200                   ; vpmulld       %ymm0,%ymm2,%ymm9
   DB  197,181,254,199                     ; vpaddd        %ymm7,%ymm9,%ymm0
-  DB  196,98,125,88,21,165,117,2,0        ; vpbroadcastd  0x275a5(%rip),%ymm10        # 2e574 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  DB  196,98,125,88,21,226,117,2,0        ; vpbroadcastd  0x275e2(%rip),%ymm10        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   DB  196,194,125,64,194                  ; vpmulld       %ymm10,%ymm0,%ymm0
   DB  197,213,118,237                     ; vpcmpeqd      %ymm5,%ymm5,%ymm5
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
@@ -6730,13 +6713,13 @@
   DB  196,65,36,87,219                    ; vxorps        %ymm11,%ymm11,%ymm11
   DB  196,98,61,146,28,168                ; vgatherdps    %ymm8,(%rax,%ymm5,4),%ymm11
   DB  197,124,17,156,36,64,1,0,0          ; vmovups       %ymm11,0x140(%rsp)
-  DB  196,98,125,88,29,236,115,2,0        ; vpbroadcastd  0x273ec(%rip),%ymm11        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  DB  196,98,125,88,29,61,116,2,0         ; vpbroadcastd  0x2743d(%rip),%ymm11        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   DB  196,193,125,254,195                 ; vpaddd        %ymm11,%ymm0,%ymm0
   DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
   DB  196,226,61,146,44,128               ; vgatherdps    %ymm8,(%rax,%ymm0,4),%ymm5
   DB  197,252,17,172,36,160,0,0,0         ; vmovups       %ymm5,0xa0(%rsp)
-  DB  196,226,125,24,5,50,117,2,0         ; vbroadcastss  0x27532(%rip),%ymm0        # 2e570 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  DB  196,226,125,24,5,111,117,2,0        ; vbroadcastss  0x2756f(%rip),%ymm0        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   DB  197,116,88,192                      ; vaddps        %ymm0,%ymm1,%ymm8
   DB  196,65,126,91,192                   ; vcvttps2dq    %ymm8,%ymm8
   DB  196,194,109,64,232                  ; vpmulld       %ymm8,%ymm2,%ymm5
@@ -7169,13 +7152,30 @@
   DB  196,193,100,92,210                  ; vsubps        %ymm10,%ymm3,%ymm2
   DB  196,194,77,168,210                  ; vfmadd213ps   %ymm10,%ymm6,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,222,105,2,0       ; vbroadcastss  0x269de(%rip),%ymm3        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,226,125,24,29,47,106,2,0        ; vbroadcastss  0x26a2f(%rip),%ymm3        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,252,16,164,36,224,6,0,0         ; vmovups       0x6e0(%rsp),%ymm4
   DB  197,252,16,172,36,0,7,0,0           ; vmovups       0x700(%rsp),%ymm5
   DB  197,252,16,180,36,32,7,0,0          ; vmovups       0x720(%rsp),%ymm6
   DB  197,252,16,188,36,64,7,0,0          ; vmovups       0x740(%rsp),%ymm7
   DB  72,129,196,120,7,0,0                ; add           $0x778,%rsp
   DB  255,224                             ; jmpq          *%rax
+
+PUBLIC _sk_gauss_a_to_rgba_hsw
+_sk_gauss_a_to_rgba_hsw LABEL PROC
+  DB  196,226,125,24,5,101,107,2,0        ; vbroadcastss  0x26b65(%rip),%ymm0        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,226,125,24,13,96,107,2,0        ; vbroadcastss  0x26b60(%rip),%ymm1        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,226,101,168,200                 ; vfmadd213ps   %ymm0,%ymm3,%ymm1
+  DB  196,226,125,24,5,86,107,2,0         ; vbroadcastss  0x26b56(%rip),%ymm0        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,226,101,184,193                 ; vfmadd231ps   %ymm1,%ymm3,%ymm0
+  DB  196,226,125,24,13,76,107,2,0        ; vbroadcastss  0x26b4c(%rip),%ymm1        # 2e570 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  DB  196,226,101,184,200                 ; vfmadd231ps   %ymm0,%ymm3,%ymm1
+  DB  196,226,125,24,5,66,107,2,0         ; vbroadcastss  0x26b42(%rip),%ymm0        # 2e574 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  DB  196,226,101,184,193                 ; vfmadd231ps   %ymm1,%ymm3,%ymm0
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
+  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
+  DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
+  DB  255,224                             ; jmpq          *%rax
   DB  144                                 ; nop
 
 PUBLIC _sk_start_pipeline_avx
@@ -13294,7 +13294,7 @@
   DB  197,252,17,108,36,64                ; vmovups       %ymm5,0x40(%rsp)
   DB  197,252,17,100,36,32                ; vmovups       %ymm4,0x20(%rsp)
   DB  197,252,40,225                      ; vmovaps       %ymm1,%ymm4
-  DB  196,98,125,24,5,197,3,2,0           ; vbroadcastss  0x203c5(%rip),%ymm8        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,5,177,3,2,0           ; vbroadcastss  0x203b1(%rip),%ymm8        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,65,124,84,216                   ; vandps        %ymm8,%ymm0,%ymm11
   DB  196,98,125,24,21,227,3,2,0          ; vbroadcastss  0x203e3(%rip),%ymm10        # 2e580 <_sk_srcover_bgra_8888_sse2_lowp+0x438>
   DB  196,65,124,84,226                   ; vandps        %ymm10,%ymm0,%ymm12
@@ -14478,27 +14478,6 @@
   DB  93                                  ; pop           %rbp
   DB  255,224                             ; jmpq          *%rax
 
-PUBLIC _sk_gauss_a_to_rgba_avx
-_sk_gauss_a_to_rgba_avx LABEL PROC
-  DB  196,226,125,24,5,203,238,1,0        ; vbroadcastss  0x1eecb(%rip),%ymm0        # 2e528 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
-  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
-  DB  196,226,125,24,13,186,238,1,0       ; vbroadcastss  0x1eeba(%rip),%ymm1        # 2e524 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
-  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,13,177,238,1,0       ; vbroadcastss  0x1eeb1(%rip),%ymm1        # 2e52c <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
-  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,13,164,238,1,0       ; vbroadcastss  0x1eea4(%rip),%ymm1        # 2e530 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
-  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,13,151,238,1,0       ; vbroadcastss  0x1ee97(%rip),%ymm1        # 2e534 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
-  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
-  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
-  DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
-  DB  255,224                             ; jmpq          *%rax
-
 PUBLIC _sk_gradient_avx
 _sk_gradient_avx LABEL PROC
   DB  65,87                               ; push          %r15
@@ -14512,12 +14491,12 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,81                              ; jb            f723 <_sk_gradient_avx+0x72>
+  DB  114,81                              ; jb            f6c6 <_sk_gradient_avx+0x72>
   DB  72,139,88,72                        ; mov           0x48(%rax),%rbx
   DB  73,255,200                          ; dec           %r8
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
-  DB  196,98,125,24,21,17,237,1,0         ; vbroadcastss  0x1ed11(%rip),%ymm10        # 2e3fc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
+  DB  196,98,125,24,21,110,237,1,0        ; vbroadcastss  0x1ed6e(%rip),%ymm10        # 2e3fc <_sk_srcover_bgra_8888_sse2_lowp+0x2b4>
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  196,98,125,24,3                     ; vbroadcastss  (%rbx),%ymm8
   DB  197,60,194,192,2                    ; vcmpleps      %ymm0,%ymm8,%ymm8
@@ -14529,7 +14508,7 @@
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  73,255,200                          ; dec           %r8
-  DB  117,204                             ; jne           f6ef <_sk_gradient_avx+0x3e>
+  DB  117,204                             ; jne           f692 <_sk_gradient_avx+0x3e>
   DB  196,195,249,22,200,1                ; vpextrq       $0x1,%xmm1,%r8
   DB  69,137,193                          ; mov           %r8d,%r9d
   DB  73,193,232,32                       ; shr           $0x20,%r8
@@ -14708,27 +14687,27 @@
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,60,234,1,0         ; vbroadcastss  0x1ea3c(%rip),%ymm13        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
+  DB  196,98,125,24,45,133,234,1,0        ; vbroadcastss  0x1ea85(%rip),%ymm13        # 2e524 <_sk_srcover_bgra_8888_sse2_lowp+0x3dc>
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,50,234,1,0         ; vbroadcastss  0x1ea32(%rip),%ymm14        # 2e53c <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
+  DB  196,98,125,24,53,123,234,1,0        ; vbroadcastss  0x1ea7b(%rip),%ymm14        # 2e528 <_sk_srcover_bgra_8888_sse2_lowp+0x3e0>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,35,234,1,0         ; vbroadcastss  0x1ea23(%rip),%ymm14        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
+  DB  196,98,125,24,53,108,234,1,0        ; vbroadcastss  0x1ea6c(%rip),%ymm14        # 2e52c <_sk_srcover_bgra_8888_sse2_lowp+0x3e4>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,229                    ; vmulps        %ymm13,%ymm12,%ymm12
-  DB  196,98,125,24,45,20,234,1,0         ; vbroadcastss  0x1ea14(%rip),%ymm13        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
+  DB  196,98,125,24,45,93,234,1,0         ; vbroadcastss  0x1ea5d(%rip),%ymm13        # 2e530 <_sk_srcover_bgra_8888_sse2_lowp+0x3e8>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,255,233,1,0        ; vbroadcastss  0x1e9ff(%rip),%ymm10        # 2e548 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
+  DB  196,98,125,24,21,72,234,1,0         ; vbroadcastss  0x1ea48(%rip),%ymm10        # 2e534 <_sk_srcover_bgra_8888_sse2_lowp+0x3ec>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,145,232,1,0        ; vbroadcastss  0x1e891(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,21,238,232,1,0        ; vbroadcastss  0x1e8ee(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,123,232,1,0        ; vbroadcastss  0x1e87b(%rip),%ymm10        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,21,216,232,1,0        ; vbroadcastss  0x1e8d8(%rip),%ymm10        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -14754,7 +14733,7 @@
   DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
   DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
   DB  197,44,88,208                       ; vaddps        %ymm0,%ymm10,%ymm10
-  DB  196,98,125,24,29,117,233,1,0        ; vbroadcastss  0x1e975(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  DB  196,98,125,24,29,190,233,1,0        ; vbroadcastss  0x1e9be(%rip),%ymm11        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
   DB  197,252,89,192                      ; vmulps        %ymm0,%ymm0,%ymm0
   DB  197,116,89,217                      ; vmulps        %ymm1,%ymm1,%ymm11
@@ -14763,17 +14742,17 @@
   DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
-  DB  196,98,125,24,13,73,233,1,0         ; vbroadcastss  0x1e949(%rip),%ymm9        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,13,146,233,1,0        ; vbroadcastss  0x1e992(%rip),%ymm9        # 2e53c <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  196,65,44,89,194                    ; vmulps        %ymm10,%ymm10,%ymm8
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
   DB  197,252,81,192                      ; vsqrtps       %ymm0,%ymm0
   DB  196,98,125,24,64,68                 ; vbroadcastss  0x44(%rax),%ymm8
-  DB  196,98,125,24,13,39,233,1,0         ; vbroadcastss  0x1e927(%rip),%ymm9        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,13,112,233,1,0        ; vbroadcastss  0x1e970(%rip),%ymm9        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,65,44,87,201                    ; vxorps        %ymm9,%ymm10,%ymm9
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,180,231,1,0        ; vbroadcastss  0x1e7b4(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,29,17,232,1,0         ; vbroadcastss  0x1e811(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  196,65,60,89,210                    ; vmulps        %ymm10,%ymm8,%ymm10
   DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
@@ -14791,7 +14770,7 @@
   DB  196,67,121,4,210,0                  ; vpermilps     $0x0,%xmm10,%xmm10
   DB  196,67,45,24,210,1                  ; vinsertf128   $0x1,%xmm10,%ymm10,%ymm10
   DB  197,44,88,208                       ; vaddps        %ymm0,%ymm10,%ymm10
-  DB  196,98,125,24,29,199,232,1,0        ; vbroadcastss  0x1e8c7(%rip),%ymm11        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  DB  196,98,125,24,29,16,233,1,0         ; vbroadcastss  0x1e910(%rip),%ymm11        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
   DB  197,252,89,192                      ; vmulps        %ymm0,%ymm0,%ymm0
   DB  197,116,89,217                      ; vmulps        %ymm1,%ymm1,%ymm11
@@ -14800,17 +14779,17 @@
   DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
-  DB  196,98,125,24,13,155,232,1,0        ; vbroadcastss  0x1e89b(%rip),%ymm9        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
+  DB  196,98,125,24,13,228,232,1,0        ; vbroadcastss  0x1e8e4(%rip),%ymm9        # 2e53c <_sk_srcover_bgra_8888_sse2_lowp+0x3f4>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  196,65,44,89,194                    ; vmulps        %ymm10,%ymm10,%ymm8
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
   DB  197,252,81,192                      ; vsqrtps       %ymm0,%ymm0
   DB  196,98,125,24,64,68                 ; vbroadcastss  0x44(%rax),%ymm8
-  DB  196,98,125,24,13,121,232,1,0        ; vbroadcastss  0x1e879(%rip),%ymm9        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,13,194,232,1,0        ; vbroadcastss  0x1e8c2(%rip),%ymm9        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,65,44,87,201                    ; vxorps        %ymm9,%ymm10,%ymm9
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,6,231,1,0          ; vbroadcastss  0x1e706(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,29,99,231,1,0         ; vbroadcastss  0x1e763(%rip),%ymm11        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  196,65,60,89,210                    ; vmulps        %ymm10,%ymm8,%ymm10
   DB  197,180,92,192                      ; vsubps        %ymm0,%ymm9,%ymm0
@@ -14827,7 +14806,7 @@
   DB  196,67,121,4,201,0                  ; vpermilps     $0x0,%xmm9,%xmm9
   DB  196,67,53,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm9,%ymm9
   DB  197,52,88,200                       ; vaddps        %ymm0,%ymm9,%ymm9
-  DB  196,98,125,24,21,31,232,1,0         ; vbroadcastss  0x1e81f(%rip),%ymm10        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
+  DB  196,98,125,24,21,104,232,1,0        ; vbroadcastss  0x1e868(%rip),%ymm10        # 2e538 <_sk_srcover_bgra_8888_sse2_lowp+0x3f0>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
   DB  197,252,89,192                      ; vmulps        %ymm0,%ymm0,%ymm0
   DB  197,116,89,209                      ; vmulps        %ymm1,%ymm1,%ymm10
@@ -14836,7 +14815,7 @@
   DB  196,227,121,4,192,0                 ; vpermilps     $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,172,92,192                      ; vsubps        %ymm0,%ymm10,%ymm0
-  DB  196,98,125,24,5,247,231,1,0         ; vbroadcastss  0x1e7f7(%rip),%ymm8        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
+  DB  196,98,125,24,5,64,232,1,0          ; vbroadcastss  0x1e840(%rip),%ymm8        # 2e540 <_sk_srcover_bgra_8888_sse2_lowp+0x3f8>
   DB  196,193,124,87,192                  ; vxorps        %ymm8,%ymm0,%ymm0
   DB  196,193,124,94,193                  ; vdivps        %ymm9,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -14874,7 +14853,7 @@
 PUBLIC _sk_save_xy_avx
 _sk_save_xy_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,29,230,1,0          ; vbroadcastss  0x1e61d(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,5,122,230,1,0         ; vbroadcastss  0x1e67a(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -14907,9 +14886,9 @@
 PUBLIC _sk_bilinear_nx_avx
 _sk_bilinear_nx_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,3,231,1,0          ; vbroadcastss  0x1e703(%rip),%ymm0        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,5,76,231,1,0         ; vbroadcastss  0x1e74c(%rip),%ymm0        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,150,229,1,0         ; vbroadcastss  0x1e596(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,243,229,1,0         ; vbroadcastss  0x1e5f3(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,128,0,0,0             ; vsubps        0x80(%rax),%ymm8,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -14918,7 +14897,7 @@
 PUBLIC _sk_bilinear_px_avx
 _sk_bilinear_px_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,115,229,1,0        ; vbroadcastss  0x1e573(%rip),%ymm0        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,226,125,24,5,208,229,1,0        ; vbroadcastss  0x1e5d0(%rip),%ymm0        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,128,128,0,0,0            ; vmovups       0x80(%rax),%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
@@ -14928,9 +14907,9 @@
 PUBLIC _sk_bilinear_ny_avx
 _sk_bilinear_ny_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,180,230,1,0       ; vbroadcastss  0x1e6b4(%rip),%ymm1        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,13,253,230,1,0       ; vbroadcastss  0x1e6fd(%rip),%ymm1        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,70,229,1,0          ; vbroadcastss  0x1e546(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,163,229,1,0         ; vbroadcastss  0x1e5a3(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,192,0,0,0             ; vsubps        0xc0(%rax),%ymm8,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -14939,7 +14918,7 @@
 PUBLIC _sk_bilinear_py_avx
 _sk_bilinear_py_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,35,229,1,0        ; vbroadcastss  0x1e523(%rip),%ymm1        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,226,125,24,13,128,229,1,0       ; vbroadcastss  0x1e580(%rip),%ymm1        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
   DB  197,124,16,128,192,0,0,0            ; vmovups       0xc0(%rax),%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
@@ -14949,14 +14928,14 @@
 PUBLIC _sk_bicubic_n3x_avx
 _sk_bicubic_n3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,103,230,1,0        ; vbroadcastss  0x1e667(%rip),%ymm0        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  DB  196,226,125,24,5,176,230,1,0        ; vbroadcastss  0x1e6b0(%rip),%ymm0        # 2e548 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,246,228,1,0         ; vbroadcastss  0x1e4f6(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,83,229,1,0          ; vbroadcastss  0x1e553(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,128,0,0,0             ; vsubps        0x80(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,72,230,1,0         ; vbroadcastss  0x1e648(%rip),%ymm10        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,145,230,1,0        ; vbroadcastss  0x1e691(%rip),%ymm10        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,70,229,1,0         ; vbroadcastss  0x1e546(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,21,163,229,1,0        ; vbroadcastss  0x1e5a3(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
@@ -14966,19 +14945,19 @@
 PUBLIC _sk_bicubic_n1x_avx
 _sk_bicubic_n1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,17,230,1,0         ; vbroadcastss  0x1e611(%rip),%ymm0        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,5,90,230,1,0         ; vbroadcastss  0x1e65a(%rip),%ymm0        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,164,228,1,0         ; vbroadcastss  0x1e4a4(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,1,229,1,0           ; vbroadcastss  0x1e501(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,128,0,0,0             ; vsubps        0x80(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,3,230,1,0          ; vbroadcastss  0x1e603(%rip),%ymm9        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,13,76,230,1,0         ; vbroadcastss  0x1e64c(%rip),%ymm9        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,241,229,1,0        ; vbroadcastss  0x1e5f1(%rip),%ymm10        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,98,125,24,21,58,230,1,0         ; vbroadcastss  0x1e63a(%rip),%ymm10        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,110,228,1,0        ; vbroadcastss  0x1e46e(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,21,203,228,1,0        ; vbroadcastss  0x1e4cb(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,211,229,1,0        ; vbroadcastss  0x1e5d3(%rip),%ymm9        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,13,28,230,1,0         ; vbroadcastss  0x1e61c(%rip),%ymm9        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -14987,17 +14966,17 @@
 PUBLIC _sk_bicubic_p1x_avx
 _sk_bicubic_p1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,63,228,1,0          ; vbroadcastss  0x1e43f(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,5,156,228,1,0         ; vbroadcastss  0x1e49c(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,136,128,0,0,0            ; vmovups       0x80(%rax),%ymm9
-  DB  196,98,125,24,21,158,229,1,0        ; vbroadcastss  0x1e59e(%rip),%ymm10        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,21,231,229,1,0        ; vbroadcastss  0x1e5e7(%rip),%ymm10        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,140,229,1,0        ; vbroadcastss  0x1e58c(%rip),%ymm11        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,98,125,24,29,213,229,1,0        ; vbroadcastss  0x1e5d5(%rip),%ymm11        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,119,229,1,0        ; vbroadcastss  0x1e577(%rip),%ymm9        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,13,192,229,1,0        ; vbroadcastss  0x1e5c0(%rip),%ymm9        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15006,13 +14985,13 @@
 PUBLIC _sk_bicubic_p3x_avx
 _sk_bicubic_p3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,83,229,1,0         ; vbroadcastss  0x1e553(%rip),%ymm0        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,226,125,24,5,156,229,1,0        ; vbroadcastss  0x1e59c(%rip),%ymm0        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,128,128,0,0,0            ; vmovups       0x80(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,53,229,1,0         ; vbroadcastss  0x1e535(%rip),%ymm10        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,126,229,1,0        ; vbroadcastss  0x1e57e(%rip),%ymm10        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,51,228,1,0         ; vbroadcastss  0x1e433(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,21,144,228,1,0        ; vbroadcastss  0x1e490(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,0,1,0,0              ; vmovups       %ymm8,0x100(%rax)
@@ -15022,14 +15001,14 @@
 PUBLIC _sk_bicubic_n3y_avx
 _sk_bicubic_n3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,2,229,1,0         ; vbroadcastss  0x1e502(%rip),%ymm1        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
+  DB  196,226,125,24,13,75,229,1,0        ; vbroadcastss  0x1e54b(%rip),%ymm1        # 2e548 <_sk_srcover_bgra_8888_sse2_lowp+0x400>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,144,227,1,0         ; vbroadcastss  0x1e390(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,237,227,1,0         ; vbroadcastss  0x1e3ed(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,192,0,0,0             ; vsubps        0xc0(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,226,228,1,0        ; vbroadcastss  0x1e4e2(%rip),%ymm10        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,43,229,1,0         ; vbroadcastss  0x1e52b(%rip),%ymm10        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,224,227,1,0        ; vbroadcastss  0x1e3e0(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,21,61,228,1,0         ; vbroadcastss  0x1e43d(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
@@ -15039,19 +15018,19 @@
 PUBLIC _sk_bicubic_n1y_avx
 _sk_bicubic_n1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,171,228,1,0       ; vbroadcastss  0x1e4ab(%rip),%ymm1        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
+  DB  196,226,125,24,13,244,228,1,0       ; vbroadcastss  0x1e4f4(%rip),%ymm1        # 2e544 <_sk_srcover_bgra_8888_sse2_lowp+0x3fc>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,61,227,1,0          ; vbroadcastss  0x1e33d(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,98,125,24,5,154,227,1,0         ; vbroadcastss  0x1e39a(%rip),%ymm8        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,60,92,128,192,0,0,0             ; vsubps        0xc0(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,156,228,1,0        ; vbroadcastss  0x1e49c(%rip),%ymm9        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,13,229,228,1,0        ; vbroadcastss  0x1e4e5(%rip),%ymm9        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,138,228,1,0        ; vbroadcastss  0x1e48a(%rip),%ymm10        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,98,125,24,21,211,228,1,0        ; vbroadcastss  0x1e4d3(%rip),%ymm10        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,7,227,1,0          ; vbroadcastss  0x1e307(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,21,100,227,1,0        ; vbroadcastss  0x1e364(%rip),%ymm10        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,108,228,1,0        ; vbroadcastss  0x1e46c(%rip),%ymm9        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,13,181,228,1,0        ; vbroadcastss  0x1e4b5(%rip),%ymm9        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15060,17 +15039,17 @@
 PUBLIC _sk_bicubic_p1y_avx
 _sk_bicubic_p1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,216,226,1,0         ; vbroadcastss  0x1e2d8(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
+  DB  196,98,125,24,5,53,227,1,0          ; vbroadcastss  0x1e335(%rip),%ymm8        # 2e3f4 <_sk_srcover_bgra_8888_sse2_lowp+0x2ac>
   DB  197,188,88,72,64                    ; vaddps        0x40(%rax),%ymm8,%ymm1
   DB  197,124,16,136,192,0,0,0            ; vmovups       0xc0(%rax),%ymm9
-  DB  196,98,125,24,21,54,228,1,0         ; vbroadcastss  0x1e436(%rip),%ymm10        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  196,98,125,24,21,127,228,1,0        ; vbroadcastss  0x1e47f(%rip),%ymm10        # 2e554 <_sk_srcover_bgra_8888_sse2_lowp+0x40c>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,36,228,1,0         ; vbroadcastss  0x1e424(%rip),%ymm11        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,98,125,24,29,109,228,1,0        ; vbroadcastss  0x1e46d(%rip),%ymm11        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,15,228,1,0         ; vbroadcastss  0x1e40f(%rip),%ymm9        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  196,98,125,24,13,88,228,1,0         ; vbroadcastss  0x1e458(%rip),%ymm9        # 2e558 <_sk_srcover_bgra_8888_sse2_lowp+0x410>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15079,13 +15058,13 @@
 PUBLIC _sk_bicubic_p3y_avx
 _sk_bicubic_p3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,235,227,1,0       ; vbroadcastss  0x1e3eb(%rip),%ymm1        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  196,226,125,24,13,52,228,1,0        ; vbroadcastss  0x1e434(%rip),%ymm1        # 2e550 <_sk_srcover_bgra_8888_sse2_lowp+0x408>
   DB  197,244,88,72,64                    ; vaddps        0x40(%rax),%ymm1,%ymm1
   DB  197,124,16,128,192,0,0,0            ; vmovups       0xc0(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,204,227,1,0        ; vbroadcastss  0x1e3cc(%rip),%ymm10        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
+  DB  196,98,125,24,21,21,228,1,0         ; vbroadcastss  0x1e415(%rip),%ymm10        # 2e54c <_sk_srcover_bgra_8888_sse2_lowp+0x404>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,202,226,1,0        ; vbroadcastss  0x1e2ca(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
+  DB  196,98,125,24,21,39,227,1,0         ; vbroadcastss  0x1e327(%rip),%ymm10        # 2e46c <_sk_srcover_bgra_8888_sse2_lowp+0x324>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,64,1,0,0             ; vmovups       %ymm8,0x140(%rax)
@@ -15221,7 +15200,7 @@
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  197,217,254,207                     ; vpaddd        %xmm7,%xmm4,%xmm1
   DB  197,249,254,193                     ; vpaddd        %xmm1,%xmm0,%xmm0
-  DB  196,226,121,24,37,110,225,1,0       ; vbroadcastss  0x1e16e(%rip),%xmm4        # 2e574 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  DB  196,226,121,24,37,183,225,1,0       ; vbroadcastss  0x1e1b7(%rip),%xmm4        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   DB  196,98,121,64,228                   ; vpmulld       %xmm4,%xmm0,%xmm12
   DB  196,226,49,64,198                   ; vpmulld       %xmm6,%xmm9,%xmm0
   DB  197,249,127,132,36,0,1,0,0          ; vmovdqa       %xmm0,0x100(%rsp)
@@ -15275,7 +15254,7 @@
   DB  196,161,122,16,60,128               ; vmovss        (%rax,%r8,4),%xmm7
   DB  196,227,73,33,247,48                ; vinsertps     $0x30,%xmm7,%xmm6,%xmm6
   DB  196,227,77,24,237,1                 ; vinsertf128   $0x1,%xmm5,%ymm6,%ymm5
-  DB  196,98,121,24,21,211,222,1,0        ; vbroadcastss  0x1ded3(%rip),%xmm10        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  DB  196,98,121,24,21,48,223,1,0         ; vbroadcastss  0x1df30(%rip),%xmm10        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   DB  196,193,105,254,210                 ; vpaddd        %xmm10,%xmm2,%xmm2
   DB  196,195,249,22,208,1                ; vpextrq       $0x1,%xmm2,%r8
   DB  196,193,249,126,209                 ; vmovq         %xmm2,%r9
@@ -15303,7 +15282,7 @@
   DB  196,161,122,16,60,128               ; vmovss        (%rax,%r8,4),%xmm7
   DB  196,227,73,33,247,48                ; vinsertps     $0x30,%xmm7,%xmm6,%xmm6
   DB  196,227,77,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm6,%ymm2
-  DB  196,226,125,24,61,171,223,1,0       ; vbroadcastss  0x1dfab(%rip),%ymm7        # 2e570 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  DB  196,226,125,24,61,244,223,1,0       ; vbroadcastss  0x1dff4(%rip),%ymm7        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   DB  197,148,88,247                      ; vaddps        %ymm7,%ymm13,%ymm6
   DB  197,124,40,231                      ; vmovaps       %ymm7,%ymm12
   DB  197,124,17,164,36,128,0,0,0         ; vmovups       %ymm12,0x80(%rsp)
@@ -16044,7 +16023,7 @@
   DB  197,249,127,132,36,160,0,0,0        ; vmovdqa       %xmm0,0xa0(%rsp)
   DB  197,225,254,216                     ; vpaddd        %xmm0,%xmm3,%xmm3
   DB  197,233,254,195                     ; vpaddd        %xmm3,%xmm2,%xmm0
-  DB  196,98,121,24,13,60,208,1,0         ; vbroadcastss  0x1d03c(%rip),%xmm9        # 2e574 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  DB  196,98,121,24,13,133,208,1,0        ; vbroadcastss  0x1d085(%rip),%xmm9        # 2e560 <_sk_srcover_bgra_8888_sse2_lowp+0x418>
   DB  196,194,121,64,209                  ; vpmulld       %xmm9,%xmm0,%xmm2
   DB  196,226,81,64,199                   ; vpmulld       %xmm7,%xmm5,%xmm0
   DB  197,249,127,132,36,224,1,0,0        ; vmovdqa       %xmm0,0x1e0(%rsp)
@@ -16101,7 +16080,7 @@
   DB  196,161,122,16,60,128               ; vmovss        (%rax,%r8,4),%xmm7
   DB  196,227,73,33,247,48                ; vinsertps     $0x30,%xmm7,%xmm6,%xmm6
   DB  196,227,77,24,237,1                 ; vinsertf128   $0x1,%xmm5,%ymm6,%ymm5
-  DB  196,98,121,24,21,145,205,1,0        ; vbroadcastss  0x1cd91(%rip),%xmm10        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
+  DB  196,98,121,24,21,238,205,1,0        ; vbroadcastss  0x1cdee(%rip),%xmm10        # 2e404 <_sk_srcover_bgra_8888_sse2_lowp+0x2bc>
   DB  196,193,121,254,194                 ; vpaddd        %xmm10,%xmm0,%xmm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
   DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
@@ -16129,7 +16108,7 @@
   DB  196,161,122,16,52,128               ; vmovss        (%rax,%r8,4),%xmm6
   DB  196,227,105,33,214,48               ; vinsertps     $0x30,%xmm6,%xmm2,%xmm2
   DB  196,227,109,24,208,1                ; vinsertf128   $0x1,%xmm0,%ymm2,%ymm2
-  DB  196,98,125,24,37,105,206,1,0        ; vbroadcastss  0x1ce69(%rip),%ymm12        # 2e570 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  DB  196,98,125,24,37,178,206,1,0        ; vbroadcastss  0x1ceb2(%rip),%ymm12        # 2e55c <_sk_srcover_bgra_8888_sse2_lowp+0x414>
   DB  196,193,4,88,196                    ; vaddps        %ymm12,%ymm15,%ymm0
   DB  197,124,17,164,36,128,0,0,0         ; vmovups       %ymm12,0x80(%rsp)
   DB  197,254,91,192                      ; vcvttps2dq    %ymm0,%ymm0
@@ -17527,7 +17506,7 @@
   DB  197,228,89,210                      ; vmulps        %ymm2,%ymm3,%ymm2
   DB  197,220,88,210                      ; vaddps        %ymm2,%ymm4,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,107,175,1,0       ; vbroadcastss  0x1af6b(%rip),%ymm3        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
+  DB  196,226,125,24,29,200,175,1,0       ; vbroadcastss  0x1afc8(%rip),%ymm3        # 2e3f8 <_sk_srcover_bgra_8888_sse2_lowp+0x2b0>
   DB  197,252,16,164,36,192,2,0,0         ; vmovups       0x2c0(%rsp),%ymm4
   DB  197,252,16,172,36,224,2,0,0         ; vmovups       0x2e0(%rsp),%ymm5
   DB  197,252,16,180,36,0,3,0,0           ; vmovups       0x300(%rsp),%ymm6
@@ -17539,6 +17518,27 @@
   DB  65,95                               ; pop           %r15
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_gauss_a_to_rgba_avx
+_sk_gauss_a_to_rgba_avx LABEL PROC
+  DB  196,226,125,24,5,251,176,1,0        ; vbroadcastss  0x1b0fb(%rip),%ymm0        # 2e568 <_sk_srcover_bgra_8888_sse2_lowp+0x420>
+  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
+  DB  196,226,125,24,13,234,176,1,0       ; vbroadcastss  0x1b0ea(%rip),%ymm1        # 2e564 <_sk_srcover_bgra_8888_sse2_lowp+0x41c>
+  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
+  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
+  DB  196,226,125,24,13,225,176,1,0       ; vbroadcastss  0x1b0e1(%rip),%ymm1        # 2e56c <_sk_srcover_bgra_8888_sse2_lowp+0x424>
+  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
+  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
+  DB  196,226,125,24,13,212,176,1,0       ; vbroadcastss  0x1b0d4(%rip),%ymm1        # 2e570 <_sk_srcover_bgra_8888_sse2_lowp+0x428>
+  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
+  DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
+  DB  196,226,125,24,13,199,176,1,0       ; vbroadcastss  0x1b0c7(%rip),%ymm1        # 2e574 <_sk_srcover_bgra_8888_sse2_lowp+0x42c>
+  DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
+  DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
+  DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_start_pipeline_sse41
 _sk_start_pipeline_sse41 LABEL PROC
   DB  85                                  ; push          %rbp
@@ -23324,23 +23324,6 @@
   DB  65,94                               ; pop           %r14
   DB  255,224                             ; jmpq          *%rax
 
-PUBLIC _sk_gauss_a_to_rgba_sse41
-_sk_gauss_a_to_rgba_sse41 LABEL PROC
-  DB  15,40,5,71,98,1,0                   ; movaps        0x16247(%rip),%xmm0        # 2f200 <_sk_srcover_bgra_8888_sse2_lowp+0x10b8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,77,98,1,0                   ; addps         0x1624d(%rip),%xmm0        # 2f210 <_sk_srcover_bgra_8888_sse2_lowp+0x10c8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,83,98,1,0                   ; addps         0x16253(%rip),%xmm0        # 2f220 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,89,98,1,0                   ; addps         0x16259(%rip),%xmm0        # 2f230 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,95,98,1,0                   ; addps         0x1625f(%rip),%xmm0        # 2f240 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
-  DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,200                           ; movaps        %xmm0,%xmm1
-  DB  15,40,208                           ; movaps        %xmm0,%xmm2
-  DB  15,40,216                           ; movaps        %xmm0,%xmm3
-  DB  255,224                             ; jmpq          *%rax
-
 PUBLIC _sk_gradient_sse41
 _sk_gradient_sse41 LABEL PROC
   DB  65,86                               ; push          %r14
@@ -23349,7 +23332,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,41                              ; jb            19029 <_sk_gradient_sse41+0x3b>
+  DB  114,41                              ; jb            18fed <_sk_gradient_sse41+0x3b>
   DB  76,139,72,72                        ; mov           0x48(%rax),%r9
   DB  73,255,200                          ; dec           %r8
   DB  73,131,193,4                        ; add           $0x4,%r9
@@ -23360,7 +23343,7 @@
   DB  102,15,250,202                      ; psubd         %xmm2,%xmm1
   DB  73,131,193,4                        ; add           $0x4,%r9
   DB  73,255,200                          ; dec           %r8
-  DB  117,230                             ; jne           1900f <_sk_gradient_sse41+0x21>
+  DB  117,230                             ; jne           18fd3 <_sk_gradient_sse41+0x21>
   DB  102,73,15,58,22,200,1               ; pextrq        $0x1,%xmm1,%r8
   DB  69,137,193                          ; mov           %r8d,%r9d
   DB  73,193,232,32                       ; shr           $0x20,%r8
@@ -23485,26 +23468,26 @@
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,40,236                        ; movaps        %xmm12,%xmm13
   DB  69,15,89,237                        ; mulps         %xmm13,%xmm13
-  DB  68,15,40,21,214,95,1,0              ; movaps        0x15fd6(%rip),%xmm10        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
+  DB  68,15,40,21,194,95,1,0              ; movaps        0x15fc2(%rip),%xmm10        # 2f200 <_sk_srcover_bgra_8888_sse2_lowp+0x10b8>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,218,95,1,0              ; addps         0x15fda(%rip),%xmm10        # 2f260 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
+  DB  68,15,88,21,198,95,1,0              ; addps         0x15fc6(%rip),%xmm10        # 2f210 <_sk_srcover_bgra_8888_sse2_lowp+0x10c8>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,222,95,1,0              ; addps         0x15fde(%rip),%xmm10        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,88,21,202,95,1,0              ; addps         0x15fca(%rip),%xmm10        # 2f220 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,226,95,1,0              ; addps         0x15fe2(%rip),%xmm10        # 2f280 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  DB  68,15,88,21,206,95,1,0              ; addps         0x15fce(%rip),%xmm10        # 2f230 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,194,195,1                     ; cmpltps       %xmm11,%xmm0
-  DB  68,15,40,29,225,95,1,0              ; movaps        0x15fe1(%rip),%xmm11        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,40,29,205,95,1,0              ; movaps        0x15fcd(%rip),%xmm11        # 2f240 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  102,69,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm10
   DB  69,15,194,200,1                     ; cmpltps       %xmm8,%xmm9
-  DB  68,15,40,29,154,89,1,0              ; movaps        0x1599a(%rip),%xmm11        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,29,214,89,1,0              ; movaps        0x159d6(%rip),%xmm11        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  102,69,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm10
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
   DB  65,15,194,192,1                     ; cmpltps       %xmm8,%xmm0
-  DB  68,15,40,13,140,89,1,0              ; movaps        0x1598c(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  68,15,40,13,200,89,1,0              ; movaps        0x159c8(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  102,69,15,56,20,209                 ; blendvps      %xmm0,%xmm9,%xmm10
   DB  69,15,194,194,7                     ; cmpordps      %xmm10,%xmm8
@@ -23534,7 +23517,7 @@
   DB  243,69,15,89,203                    ; mulss         %xmm11,%xmm9
   DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
   DB  68,15,88,200                        ; addps         %xmm0,%xmm9
-  DB  68,15,89,13,85,95,1,0               ; mulps         0x15f55(%rip),%xmm9        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  DB  68,15,89,13,65,95,1,0               ; mulps         0x15f41(%rip),%xmm9        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
@@ -23542,7 +23525,7 @@
   DB  243,69,15,89,219                    ; mulss         %xmm11,%xmm11
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,92,227                        ; subps         %xmm11,%xmm12
-  DB  68,15,89,21,64,95,1,0               ; mulps         0x15f40(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  DB  68,15,89,21,44,95,1,0               ; mulps         0x15f2c(%rip),%xmm10        # 2f260 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
@@ -23551,8 +23534,8 @@
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  65,15,92,193                        ; subps         %xmm9,%xmm0
-  DB  68,15,87,13,232,93,1,0              ; xorps         0x15de8(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
-  DB  68,15,89,5,192,88,1,0               ; mulps         0x158c0(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,87,13,36,94,1,0               ; xorps         0x15e24(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
+  DB  68,15,89,5,252,88,1,0               ; mulps         0x158fc(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
@@ -23571,7 +23554,7 @@
   DB  243,69,15,89,203                    ; mulss         %xmm11,%xmm9
   DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
   DB  68,15,88,200                        ; addps         %xmm0,%xmm9
-  DB  68,15,89,13,183,94,1,0              ; mulps         0x15eb7(%rip),%xmm9        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  DB  68,15,89,13,163,94,1,0              ; mulps         0x15ea3(%rip),%xmm9        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
@@ -23579,7 +23562,7 @@
   DB  243,69,15,89,219                    ; mulss         %xmm11,%xmm11
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,92,227                        ; subps         %xmm11,%xmm12
-  DB  68,15,89,21,162,94,1,0              ; mulps         0x15ea2(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  DB  68,15,89,21,142,94,1,0              ; mulps         0x15e8e(%rip),%xmm10        # 2f260 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
@@ -23588,8 +23571,8 @@
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  65,15,92,193                        ; subps         %xmm9,%xmm0
-  DB  68,15,87,13,74,93,1,0               ; xorps         0x15d4a(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
-  DB  68,15,89,5,34,88,1,0                ; mulps         0x15822(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,87,13,134,93,1,0              ; xorps         0x15d86(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
+  DB  68,15,89,5,94,88,1,0                ; mulps         0x1585e(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
@@ -23605,7 +23588,7 @@
   DB  243,69,15,89,200                    ; mulss         %xmm8,%xmm9
   DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
   DB  68,15,88,200                        ; addps         %xmm0,%xmm9
-  DB  68,15,89,13,42,94,1,0               ; mulps         0x15e2a(%rip),%xmm9        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  DB  68,15,89,13,22,94,1,0               ; mulps         0x15e16(%rip),%xmm9        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
   DB  69,15,89,210                        ; mulps         %xmm10,%xmm10
@@ -23613,7 +23596,7 @@
   DB  243,69,15,89,192                    ; mulss         %xmm8,%xmm8
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
   DB  65,15,92,192                        ; subps         %xmm8,%xmm0
-  DB  15,87,5,230,92,1,0                  ; xorps         0x15ce6(%rip),%xmm0        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
+  DB  15,87,5,34,93,1,0                   ; xorps         0x15d22(%rip),%xmm0        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
   DB  65,15,94,193                        ; divps         %xmm9,%xmm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23655,7 +23638,7 @@
 PUBLIC _sk_save_xy_sse41
 _sk_save_xy_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,69,87,1,0                ; movaps        0x15745(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,5,129,87,1,0               ; movaps        0x15781(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  15,17,0                             ; movups        %xmm0,(%rax)
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,88,200                        ; addps         %xmm8,%xmm9
@@ -23695,8 +23678,8 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,14,93,1,0                   ; addps         0x15d0e(%rip),%xmm0        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,182,86,1,0              ; movaps        0x156b6(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,5,250,92,1,0                  ; addps         0x15cfa(%rip),%xmm0        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,242,86,1,0              ; movaps        0x156f2(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,0,1,0,0                ; movups        %xmm9,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -23707,7 +23690,7 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,130,86,1,0                  ; addps         0x15682(%rip),%xmm0        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  15,88,5,190,86,1,0                  ; addps         0x156be(%rip),%xmm0        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  68,15,17,128,0,1,0,0                ; movups        %xmm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23717,8 +23700,8 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,193,92,1,0                 ; addps         0x15cc1(%rip),%xmm1        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,105,86,1,0              ; movaps        0x15669(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,13,173,92,1,0                 ; addps         0x15cad(%rip),%xmm1        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,165,86,1,0              ; movaps        0x156a5(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,64,1,0,0               ; movups        %xmm9,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -23729,7 +23712,7 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,52,86,1,0                  ; addps         0x15634(%rip),%xmm1        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  15,88,13,112,86,1,0                 ; addps         0x15670(%rip),%xmm1        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  68,15,17,128,64,1,0,0               ; movups        %xmm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23739,13 +23722,13 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,132,92,1,0                  ; addps         0x15c84(%rip),%xmm0        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
-  DB  68,15,40,13,28,86,1,0               ; movaps        0x1561c(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,5,112,92,1,0                  ; addps         0x15c70(%rip),%xmm0        # 2f280 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  DB  68,15,40,13,88,86,1,0               ; movaps        0x15658(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,120,92,1,0              ; mulps         0x15c78(%rip),%xmm9        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,13,0,88,1,0                ; addps         0x15800(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,13,100,92,1,0              ; mulps         0x15c64(%rip),%xmm9        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,13,60,88,1,0               ; addps         0x1583c(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,0,1,0,0                ; movups        %xmm9,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -23756,16 +23739,16 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,44,92,1,0                   ; addps         0x15c2c(%rip),%xmm0        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,212,85,1,0              ; movaps        0x155d4(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,5,24,92,1,0                   ; addps         0x15c18(%rip),%xmm0        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,16,86,1,0               ; movaps        0x15610(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,72,92,1,0                ; movaps        0x15c48(%rip),%xmm8        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,5,52,92,1,0                ; movaps        0x15c34(%rip),%xmm8        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,76,92,1,0                ; addps         0x15c4c(%rip),%xmm8        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,5,56,92,1,0                ; addps         0x15c38(%rip),%xmm8        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,160,85,1,0               ; addps         0x155a0(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,88,5,220,85,1,0               ; addps         0x155dc(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,68,92,1,0                ; addps         0x15c44(%rip),%xmm8        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,5,48,92,1,0                ; addps         0x15c30(%rip),%xmm8        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,128,0,1,0,0                ; movups        %xmm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23773,17 +23756,17 @@
 PUBLIC _sk_bicubic_p1x_sse41
 _sk_bicubic_p1x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,126,85,1,0               ; movaps        0x1557e(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,5,186,85,1,0               ; movaps        0x155ba(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,136,128,0,0,0              ; movups        0x80(%rax),%xmm9
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,21,247,91,1,0              ; movaps        0x15bf7(%rip),%xmm10        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,21,227,91,1,0              ; movaps        0x15be3(%rip),%xmm10        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,251,91,1,0              ; addps         0x15bfb(%rip),%xmm10        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,21,231,91,1,0              ; addps         0x15be7(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,247,91,1,0              ; addps         0x15bf7(%rip),%xmm10        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,21,227,91,1,0              ; addps         0x15be3(%rip),%xmm10        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,144,0,1,0,0                ; movups        %xmm10,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23793,11 +23776,11 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,199,91,1,0                  ; addps         0x15bc7(%rip),%xmm0        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  15,88,5,179,91,1,0                  ; addps         0x15bb3(%rip),%xmm0        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,151,91,1,0               ; mulps         0x15b97(%rip),%xmm8        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,5,31,87,1,0                ; addps         0x1571f(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,5,131,91,1,0               ; mulps         0x15b83(%rip),%xmm8        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,5,91,87,1,0                ; addps         0x1575b(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,0,1,0,0                ; movups        %xmm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -23808,13 +23791,13 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,90,91,1,0                  ; addps         0x15b5a(%rip),%xmm1        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
-  DB  68,15,40,13,242,84,1,0              ; movaps        0x154f2(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,13,70,91,1,0                  ; addps         0x15b46(%rip),%xmm1        # 2f280 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  DB  68,15,40,13,46,85,1,0               ; movaps        0x1552e(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,78,91,1,0               ; mulps         0x15b4e(%rip),%xmm9        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,13,214,86,1,0              ; addps         0x156d6(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,13,58,91,1,0               ; mulps         0x15b3a(%rip),%xmm9        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,13,18,87,1,0               ; addps         0x15712(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,64,1,0,0               ; movups        %xmm9,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -23825,16 +23808,16 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,1,91,1,0                   ; addps         0x15b01(%rip),%xmm1        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,169,84,1,0              ; movaps        0x154a9(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,13,237,90,1,0                 ; addps         0x15aed(%rip),%xmm1        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,229,84,1,0              ; movaps        0x154e5(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,29,91,1,0                ; movaps        0x15b1d(%rip),%xmm8        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,5,9,91,1,0                 ; movaps        0x15b09(%rip),%xmm8        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,33,91,1,0                ; addps         0x15b21(%rip),%xmm8        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,5,13,91,1,0                ; addps         0x15b0d(%rip),%xmm8        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,117,84,1,0               ; addps         0x15475(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,88,5,177,84,1,0               ; addps         0x154b1(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,25,91,1,0                ; addps         0x15b19(%rip),%xmm8        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,5,5,91,1,0                 ; addps         0x15b05(%rip),%xmm8        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,128,64,1,0,0               ; movups        %xmm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23842,17 +23825,17 @@
 PUBLIC _sk_bicubic_p1y_sse41
 _sk_bicubic_p1y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,83,84,1,0                ; movaps        0x15453(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,5,143,84,1,0               ; movaps        0x1548f(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,136,192,0,0,0              ; movups        0xc0(%rax),%xmm9
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  68,15,40,21,203,90,1,0              ; movaps        0x15acb(%rip),%xmm10        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,21,183,90,1,0              ; movaps        0x15ab7(%rip),%xmm10        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,207,90,1,0              ; addps         0x15acf(%rip),%xmm10        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,21,187,90,1,0              ; addps         0x15abb(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,203,90,1,0              ; addps         0x15acb(%rip),%xmm10        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,21,183,90,1,0              ; addps         0x15ab7(%rip),%xmm10        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,144,64,1,0,0               ; movups        %xmm10,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -23862,11 +23845,11 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,154,90,1,0                 ; addps         0x15a9a(%rip),%xmm1        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  15,88,13,134,90,1,0                 ; addps         0x15a86(%rip),%xmm1        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,106,90,1,0               ; mulps         0x15a6a(%rip),%xmm8        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,5,242,85,1,0               ; addps         0x155f2(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,5,86,90,1,0                ; mulps         0x15a56(%rip),%xmm8        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,5,46,86,1,0                ; addps         0x1562e(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,64,1,0,0               ; movups        %xmm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -23961,7 +23944,7 @@
   DB  15,89,194                           ; mulps         %xmm2,%xmm0
   DB  15,41,68,36,80                      ; movaps        %xmm0,0x50(%rsp)
   DB  243,15,91,240                       ; cvttps2dq     %xmm0,%xmm6
-  DB  15,40,37,109,89,1,0                 ; movaps        0x1596d(%rip),%xmm4        # 2f320 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  DB  15,40,37,89,89,1,0                  ; movaps        0x15959(%rip),%xmm4        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   DB  15,88,196                           ; addps         %xmm4,%xmm0
   DB  15,41,68,36,48                      ; movaps        %xmm0,0x30(%rsp)
   DB  102,65,15,110,208                   ; movd          %r8d,%xmm2
@@ -24000,7 +23983,7 @@
   DB  102,68,15,56,64,192                 ; pmulld        %xmm0,%xmm8
   DB  102,65,15,111,216                   ; movdqa        %xmm8,%xmm3
   DB  102,15,254,218                      ; paddd         %xmm2,%xmm3
-  DB  102,68,15,111,37,196,88,1,0         ; movdqa        0x158c4(%rip),%xmm12        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,68,15,111,37,176,88,1,0         ; movdqa        0x158b0(%rip),%xmm12        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,65,15,56,64,220                 ; pmulld        %xmm12,%xmm3
   DB  102,15,118,228                      ; pcmpeqd       %xmm4,%xmm4
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
@@ -24023,7 +24006,7 @@
   DB  102,65,15,58,22,217,1               ; pextrd        $0x1,%xmm3,%r9d
   DB  102,65,15,58,22,218,2               ; pextrd        $0x2,%xmm3,%r10d
   DB  102,65,15,58,22,219,3               ; pextrd        $0x3,%xmm3,%r11d
-  DB  102,15,111,37,184,81,1,0            ; movdqa        0x151b8(%rip),%xmm4        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,37,244,81,1,0            ; movdqa        0x151f4(%rip),%xmm4        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,254,220                      ; paddd         %xmm4,%xmm3
   DB  102,15,111,252                      ; movdqa        %xmm4,%xmm7
   DB  102,73,15,58,22,222,1               ; pextrq        $0x1,%xmm3,%r14
@@ -24196,7 +24179,7 @@
   DB  102,68,15,254,202                   ; paddd         %xmm2,%xmm9
   DB  102,65,15,111,192                   ; movdqa        %xmm8,%xmm0
   DB  102,65,15,254,193                   ; paddd         %xmm9,%xmm0
-  DB  102,15,111,45,211,84,1,0            ; movdqa        0x154d3(%rip),%xmm5        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,45,191,84,1,0            ; movdqa        0x154bf(%rip),%xmm5        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,56,64,197                    ; pmulld        %xmm5,%xmm0
   DB  102,15,111,200                      ; movdqa        %xmm0,%xmm1
   DB  102,15,118,246                      ; pcmpeqd       %xmm6,%xmm6
@@ -24217,7 +24200,7 @@
   DB  102,65,15,58,22,193,1               ; pextrd        $0x1,%xmm0,%r9d
   DB  102,65,15,58,22,194,2               ; pextrd        $0x2,%xmm0,%r10d
   DB  102,65,15,58,22,195,3               ; pextrd        $0x3,%xmm0,%r11d
-  DB  102,15,111,61,212,77,1,0            ; movdqa        0x14dd4(%rip),%xmm7        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,61,16,78,1,0             ; movdqa        0x14e10(%rip),%xmm7        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,254,199                      ; paddd         %xmm7,%xmm0
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
   DB  102,72,15,126,195                   ; movq          %xmm0,%rbx
@@ -24283,7 +24266,7 @@
   DB  15,88,248                           ; addps         %xmm0,%xmm7
   DB  102,68,15,254,210                   ; paddd         %xmm2,%xmm10
   DB  102,69,15,254,194                   ; paddd         %xmm10,%xmm8
-  DB  102,15,111,13,15,83,1,0             ; movdqa        0x1530f(%rip),%xmm1        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,13,251,82,1,0            ; movdqa        0x152fb(%rip),%xmm1        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,68,15,56,64,193                 ; pmulld        %xmm1,%xmm8
   DB  102,65,15,111,192                   ; movdqa        %xmm8,%xmm0
   DB  102,15,118,237                      ; pcmpeqd       %xmm5,%xmm5
@@ -24304,7 +24287,7 @@
   DB  102,69,15,58,22,193,1               ; pextrd        $0x1,%xmm8,%r9d
   DB  102,69,15,58,22,194,2               ; pextrd        $0x2,%xmm8,%r10d
   DB  102,69,15,58,22,195,3               ; pextrd        $0x3,%xmm8,%r11d
-  DB  102,15,111,21,10,76,1,0             ; movdqa        0x14c0a(%rip),%xmm2        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,21,70,76,1,0             ; movdqa        0x14c46(%rip),%xmm2        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,68,15,254,194                   ; paddd         %xmm2,%xmm8
   DB  102,76,15,58,22,193,1               ; pextrq        $0x1,%xmm8,%rcx
   DB  102,76,15,126,195                   ; movq          %xmm8,%rbx
@@ -24429,7 +24412,7 @@
   DB  15,89,203                           ; mulps         %xmm3,%xmm1
   DB  15,41,140,36,176,0,0,0              ; movaps        %xmm1,0xb0(%rsp)
   DB  243,15,91,249                       ; cvttps2dq     %xmm1,%xmm7
-  DB  15,40,29,81,80,1,0                  ; movaps        0x15051(%rip),%xmm3        # 2f320 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  DB  15,40,29,61,80,1,0                  ; movaps        0x1503d(%rip),%xmm3        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   DB  15,88,203                           ; addps         %xmm3,%xmm1
   DB  15,41,76,36,32                      ; movaps        %xmm1,0x20(%rsp)
   DB  102,65,15,110,232                   ; movd          %r8d,%xmm5
@@ -24486,7 +24469,7 @@
   DB  102,68,15,56,64,253                 ; pmulld        %xmm5,%xmm15
   DB  102,65,15,111,223                   ; movdqa        %xmm15,%xmm3
   DB  102,15,254,217                      ; paddd         %xmm1,%xmm3
-  DB  102,68,15,111,29,75,79,1,0          ; movdqa        0x14f4b(%rip),%xmm11        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,68,15,111,29,55,79,1,0          ; movdqa        0x14f37(%rip),%xmm11        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,65,15,56,64,219                 ; pmulld        %xmm11,%xmm3
   DB  102,15,118,192                      ; pcmpeqd       %xmm0,%xmm0
   DB  102,15,111,243                      ; movdqa        %xmm3,%xmm6
@@ -24508,7 +24491,7 @@
   DB  102,65,15,58,22,217,1               ; pextrd        $0x1,%xmm3,%r9d
   DB  102,65,15,58,22,218,2               ; pextrd        $0x2,%xmm3,%r10d
   DB  102,65,15,58,22,219,3               ; pextrd        $0x3,%xmm3,%r11d
-  DB  102,68,15,111,21,67,72,1,0          ; movdqa        0x14843(%rip),%xmm10        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,21,127,72,1,0         ; movdqa        0x1487f(%rip),%xmm10        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,218                   ; paddd         %xmm10,%xmm3
   DB  102,73,15,58,22,222,1               ; pextrq        $0x1,%xmm3,%r14
   DB  102,72,15,126,219                   ; movq          %xmm3,%rbx
@@ -24526,7 +24509,7 @@
   DB  102,65,15,254,201                   ; paddd         %xmm9,%xmm1
   DB  102,65,15,56,64,203                 ; pmulld        %xmm11,%xmm1
   DB  102,15,111,217                      ; movdqa        %xmm1,%xmm3
-  DB  102,15,250,29,129,78,1,0            ; psubd         0x14e81(%rip),%xmm3        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,29,109,78,1,0            ; psubd         0x14e6d(%rip),%xmm3        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,73,15,58,22,222,1               ; pextrq        $0x1,%xmm3,%r14
   DB  102,72,15,126,219                   ; movq          %xmm3,%rbx
   DB  65,137,223                          ; mov           %ebx,%r15d
@@ -24581,7 +24564,7 @@
   DB  102,65,15,254,199                   ; paddd         %xmm15,%xmm0
   DB  102,65,15,56,64,195                 ; pmulld        %xmm11,%xmm0
   DB  102,15,111,232                      ; movdqa        %xmm0,%xmm5
-  DB  102,15,250,45,97,77,1,0             ; psubd         0x14d61(%rip),%xmm5        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,45,77,77,1,0             ; psubd         0x14d4d(%rip),%xmm5        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,69,15,118,192                   ; pcmpeqd       %xmm8,%xmm8
   DB  102,72,15,58,22,233,1               ; pextrq        $0x1,%xmm5,%rcx
   DB  102,72,15,126,237                   ; movq          %xmm5,%rbp
@@ -24706,7 +24689,7 @@
   DB  72,193,233,32                       ; shr           $0x20,%rcx
   DB  243,15,16,36,136                    ; movss         (%rax,%rcx,4),%xmm4
   DB  102,68,15,58,33,220,48              ; insertps      $0x30,%xmm4,%xmm11
-  DB  102,15,111,61,52,68,1,0             ; movdqa        0x14434(%rip),%xmm7        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,61,112,68,1,0            ; movdqa        0x14470(%rip),%xmm7        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,254,223                      ; paddd         %xmm7,%xmm3
   DB  102,72,15,58,22,217,1               ; pextrq        $0x1,%xmm3,%rcx
   DB  102,72,15,126,221                   ; movq          %xmm3,%rbp
@@ -24792,7 +24775,7 @@
   DB  102,65,15,58,22,201,1               ; pextrd        $0x1,%xmm1,%r9d
   DB  102,65,15,58,22,202,2               ; pextrd        $0x2,%xmm1,%r10d
   DB  102,65,15,58,22,203,3               ; pextrd        $0x3,%xmm1,%r11d
-  DB  102,68,15,111,21,116,66,1,0         ; movdqa        0x14274(%rip),%xmm10        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,21,176,66,1,0         ; movdqa        0x142b0(%rip),%xmm10        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,202                   ; paddd         %xmm10,%xmm1
   DB  102,72,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%rcx
   DB  102,72,15,126,203                   ; movq          %xmm1,%rbx
@@ -24888,7 +24871,7 @@
   DB  102,15,111,124,36,64                ; movdqa        0x40(%rsp),%xmm7
   DB  102,15,111,199                      ; movdqa        %xmm7,%xmm0
   DB  102,15,254,195                      ; paddd         %xmm3,%xmm0
-  DB  102,15,111,21,24,71,1,0             ; movdqa        0x14718(%rip),%xmm2        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,21,4,71,1,0              ; movdqa        0x14704(%rip),%xmm2        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,56,64,194                    ; pmulld        %xmm2,%xmm0
   DB  102,15,111,200                      ; movdqa        %xmm0,%xmm1
   DB  102,69,15,118,192                   ; pcmpeqd       %xmm8,%xmm8
@@ -24909,7 +24892,7 @@
   DB  102,65,15,58,22,193,1               ; pextrd        $0x1,%xmm0,%r9d
   DB  102,65,15,58,22,194,2               ; pextrd        $0x2,%xmm0,%r10d
   DB  102,65,15,58,22,195,3               ; pextrd        $0x3,%xmm0,%r11d
-  DB  102,15,111,37,23,64,1,0             ; movdqa        0x14017(%rip),%xmm4        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,37,83,64,1,0             ; movdqa        0x14053(%rip),%xmm4        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,254,196                      ; paddd         %xmm4,%xmm0
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
   DB  102,72,15,126,195                   ; movq          %xmm0,%rbx
@@ -24981,7 +24964,7 @@
   DB  102,68,15,111,226                   ; movdqa        %xmm2,%xmm12
   DB  102,65,15,56,64,204                 ; pmulld        %xmm12,%xmm1
   DB  102,15,111,209                      ; movdqa        %xmm1,%xmm2
-  DB  102,15,250,21,67,69,1,0             ; psubd         0x14543(%rip),%xmm2        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,21,47,69,1,0             ; psubd         0x1452f(%rip),%xmm2        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,72,15,58,22,209,1               ; pextrq        $0x1,%xmm2,%rcx
   DB  102,72,15,126,213                   ; movq          %xmm2,%rbp
   DB  137,235                             ; mov           %ebp,%ebx
@@ -25015,7 +24998,7 @@
   DB  102,65,15,111,249                   ; movdqa        %xmm9,%xmm7
   DB  102,69,15,56,64,236                 ; pmulld        %xmm12,%xmm13
   DB  102,65,15,111,205                   ; movdqa        %xmm13,%xmm1
-  DB  102,15,250,13,143,68,1,0            ; psubd         0x1448f(%rip),%xmm1        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,13,123,68,1,0            ; psubd         0x1447b(%rip),%xmm1        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,72,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%rcx
   DB  102,72,15,126,203                   ; movq          %xmm1,%rbx
   DB  137,221                             ; mov           %ebx,%ebp
@@ -25080,7 +25063,7 @@
   DB  102,65,15,111,206                   ; movdqa        %xmm14,%xmm1
   DB  102,15,111,108,36,64                ; movdqa        0x40(%rsp),%xmm5
   DB  102,15,254,205                      ; paddd         %xmm5,%xmm1
-  DB  102,15,111,37,35,67,1,0             ; movdqa        0x14323(%rip),%xmm4        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,37,15,67,1,0             ; movdqa        0x1430f(%rip),%xmm4        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,56,64,204                    ; pmulld        %xmm4,%xmm1
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,15,118,246                      ; pcmpeqd       %xmm6,%xmm6
@@ -25101,7 +25084,7 @@
   DB  102,65,15,58,22,201,1               ; pextrd        $0x1,%xmm1,%r9d
   DB  102,65,15,58,22,202,2               ; pextrd        $0x2,%xmm1,%r10d
   DB  102,65,15,58,22,203,3               ; pextrd        $0x3,%xmm1,%r11d
-  DB  102,15,111,29,36,60,1,0             ; movdqa        0x13c24(%rip),%xmm3        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,29,96,60,1,0             ; movdqa        0x13c60(%rip),%xmm3        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,254,203                      ; paddd         %xmm3,%xmm1
   DB  102,72,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%rcx
   DB  102,72,15,126,203                   ; movq          %xmm1,%rbx
@@ -25173,7 +25156,7 @@
   DB  102,15,56,64,239                    ; pmulld        %xmm7,%xmm5
   DB  102,15,111,205                      ; movdqa        %xmm5,%xmm1
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
-  DB  102,15,250,5,84,65,1,0              ; psubd         0x14154(%rip),%xmm0        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,5,64,65,1,0              ; psubd         0x14140(%rip),%xmm0        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
   DB  102,72,15,126,197                   ; movq          %xmm0,%rbp
   DB  137,235                             ; mov           %ebp,%ebx
@@ -25191,7 +25174,7 @@
   DB  102,65,15,58,22,193,1               ; pextrd        $0x1,%xmm0,%r9d
   DB  102,65,15,58,22,194,2               ; pextrd        $0x2,%xmm0,%r10d
   DB  102,65,15,58,22,195,3               ; pextrd        $0x3,%xmm0,%r11d
-  DB  102,15,254,5,82,58,1,0              ; paddd         0x13a52(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,5,142,58,1,0             ; paddd         0x13a8e(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
   DB  102,72,15,126,195                   ; movq          %xmm0,%rbx
   DB  137,221                             ; mov           %ebx,%ebp
@@ -25207,7 +25190,7 @@
   DB  102,15,254,84,36,48                 ; paddd         0x30(%rsp),%xmm2
   DB  102,15,56,64,215                    ; pmulld        %xmm7,%xmm2
   DB  102,15,111,194                      ; movdqa        %xmm2,%xmm0
-  DB  102,15,250,5,155,64,1,0             ; psubd         0x1409b(%rip),%xmm0        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,5,135,64,1,0             ; psubd         0x14087(%rip),%xmm0        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
   DB  102,72,15,126,195                   ; movq          %xmm0,%rbx
   DB  137,221                             ; mov           %ebx,%ebp
@@ -25225,7 +25208,7 @@
   DB  102,65,15,58,22,199,1               ; pextrd        $0x1,%xmm0,%r15d
   DB  102,65,15,58,22,196,2               ; pextrd        $0x2,%xmm0,%r12d
   DB  102,65,15,58,22,198,3               ; pextrd        $0x3,%xmm0,%r14d
-  DB  102,15,254,5,153,57,1,0             ; paddd         0x13999(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,5,213,57,1,0             ; paddd         0x139d5(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,72,15,58,22,195,1               ; pextrq        $0x1,%xmm0,%rbx
   DB  102,72,15,126,197                   ; movq          %xmm0,%rbp
   DB  137,233                             ; mov           %ebp,%ecx
@@ -25288,7 +25271,7 @@
   DB  15,89,212                           ; mulps         %xmm4,%xmm2
   DB  65,15,88,211                        ; addps         %xmm11,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,81,56,1,0                  ; movaps        0x13851(%rip),%xmm3        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,40,29,141,56,1,0                 ; movaps        0x1388d(%rip),%xmm3        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  72,139,76,36,120                    ; mov           0x78(%rsp),%rcx
   DB  15,40,164,36,192,0,0,0              ; movaps        0xc0(%rsp),%xmm4
   DB  15,40,172,36,208,0,0,0              ; movaps        0xd0(%rsp),%xmm5
@@ -25303,6 +25286,23 @@
   DB  93                                  ; pop           %rbp
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_gauss_a_to_rgba_sse41
+_sk_gauss_a_to_rgba_sse41 LABEL PROC
+  DB  15,40,5,222,62,1,0                  ; movaps        0x13ede(%rip),%xmm0        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,228,62,1,0                  ; addps         0x13ee4(%rip),%xmm0        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,234,62,1,0                  ; addps         0x13eea(%rip),%xmm0        # 2f320 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,240,62,1,0                  ; addps         0x13ef0(%rip),%xmm0        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,246,62,1,0                  ; addps         0x13ef6(%rip),%xmm0        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  15,40,200                           ; movaps        %xmm0,%xmm1
+  DB  15,40,208                           ; movaps        %xmm0,%xmm2
+  DB  15,40,216                           ; movaps        %xmm0,%xmm3
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_start_pipeline_sse2
 _sk_start_pipeline_sse2 LABEL PROC
   DB  85                                  ; push          %rbp
@@ -31459,23 +31459,6 @@
   DB  65,94                               ; pop           %r14
   DB  255,224                             ; jmpq          *%rax
 
-PUBLIC _sk_gauss_a_to_rgba_sse2
-_sk_gauss_a_to_rgba_sse2 LABEL PROC
-  DB  15,40,5,162,222,0,0                 ; movaps        0xdea2(%rip),%xmm0        # 2f200 <_sk_srcover_bgra_8888_sse2_lowp+0x10b8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,168,222,0,0                 ; addps         0xdea8(%rip),%xmm0        # 2f210 <_sk_srcover_bgra_8888_sse2_lowp+0x10c8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,174,222,0,0                 ; addps         0xdeae(%rip),%xmm0        # 2f220 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,180,222,0,0                 ; addps         0xdeb4(%rip),%xmm0        # 2f230 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,5,186,222,0,0                 ; addps         0xdeba(%rip),%xmm0        # 2f240 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
-  DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,200                           ; movaps        %xmm0,%xmm1
-  DB  15,40,208                           ; movaps        %xmm0,%xmm2
-  DB  15,40,216                           ; movaps        %xmm0,%xmm3
-  DB  255,224                             ; jmpq          *%rax
-
 PUBLIC _sk_gradient_sse2
 _sk_gradient_sse2 LABEL PROC
   DB  65,86                               ; push          %r14
@@ -31484,7 +31467,7 @@
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,41                              ; jb            213ce <_sk_gradient_sse2+0x3b>
+  DB  114,41                              ; jb            21392 <_sk_gradient_sse2+0x3b>
   DB  76,139,72,72                        ; mov           0x48(%rax),%r9
   DB  73,255,200                          ; dec           %r8
   DB  73,131,193,4                        ; add           $0x4,%r9
@@ -31495,7 +31478,7 @@
   DB  102,15,250,202                      ; psubd         %xmm2,%xmm1
   DB  73,131,193,4                        ; add           $0x4,%r9
   DB  73,255,200                          ; dec           %r8
-  DB  117,230                             ; jne           213b4 <_sk_gradient_sse2+0x21>
+  DB  117,230                             ; jne           21378 <_sk_gradient_sse2+0x21>
   DB  102,15,112,209,78                   ; pshufd        $0x4e,%xmm1,%xmm2
   DB  102,73,15,126,209                   ; movq          %xmm2,%r9
   DB  69,137,200                          ; mov           %r9d,%r8d
@@ -31629,29 +31612,29 @@
   DB  69,15,94,220                        ; divps         %xmm12,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
-  DB  68,15,40,45,74,220,0,0              ; movaps        0xdc4a(%rip),%xmm13        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
+  DB  68,15,40,45,54,220,0,0              ; movaps        0xdc36(%rip),%xmm13        # 2f200 <_sk_srcover_bgra_8888_sse2_lowp+0x10b8>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,78,220,0,0              ; addps         0xdc4e(%rip),%xmm13        # 2f260 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
+  DB  68,15,88,45,58,220,0,0              ; addps         0xdc3a(%rip),%xmm13        # 2f210 <_sk_srcover_bgra_8888_sse2_lowp+0x10c8>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,82,220,0,0              ; addps         0xdc52(%rip),%xmm13        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,88,45,62,220,0,0              ; addps         0xdc3e(%rip),%xmm13        # 2f220 <_sk_srcover_bgra_8888_sse2_lowp+0x10d8>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,86,220,0,0              ; addps         0xdc56(%rip),%xmm13        # 2f280 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  DB  68,15,88,45,66,220,0,0              ; addps         0xdc42(%rip),%xmm13        # 2f230 <_sk_srcover_bgra_8888_sse2_lowp+0x10e8>
   DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,40,21,85,220,0,0              ; movaps        0xdc55(%rip),%xmm10        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,40,21,65,220,0,0              ; movaps        0xdc41(%rip),%xmm10        # 2f240 <_sk_srcover_bgra_8888_sse2_lowp+0x10f8>
   DB  69,15,92,213                        ; subps         %xmm13,%xmm10
   DB  69,15,84,209                        ; andps         %xmm9,%xmm10
   DB  69,15,85,205                        ; andnps        %xmm13,%xmm9
   DB  69,15,86,202                        ; orps          %xmm10,%xmm9
   DB  68,15,194,192,1                     ; cmpltps       %xmm0,%xmm8
-  DB  68,15,40,21,8,214,0,0               ; movaps        0xd608(%rip),%xmm10        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,21,68,214,0,0              ; movaps        0xd644(%rip),%xmm10        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,84,208                        ; andps         %xmm8,%xmm10
   DB  69,15,85,193                        ; andnps        %xmm9,%xmm8
   DB  69,15,86,194                        ; orps          %xmm10,%xmm8
   DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
   DB  68,15,194,200,1                     ; cmpltps       %xmm0,%xmm9
-  DB  68,15,40,21,247,213,0,0             ; movaps        0xd5f7(%rip),%xmm10        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  68,15,40,21,51,214,0,0              ; movaps        0xd633(%rip),%xmm10        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,208                        ; subps         %xmm8,%xmm10
   DB  69,15,84,209                        ; andps         %xmm9,%xmm10
   DB  69,15,85,200                        ; andnps        %xmm8,%xmm9
@@ -31682,7 +31665,7 @@
   DB  243,69,15,89,203                    ; mulss         %xmm11,%xmm9
   DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
   DB  68,15,88,200                        ; addps         %xmm0,%xmm9
-  DB  68,15,89,13,190,219,0,0             ; mulps         0xdbbe(%rip),%xmm9        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  DB  68,15,89,13,170,219,0,0             ; mulps         0xdbaa(%rip),%xmm9        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
@@ -31690,7 +31673,7 @@
   DB  243,69,15,89,219                    ; mulss         %xmm11,%xmm11
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,92,227                        ; subps         %xmm11,%xmm12
-  DB  68,15,89,21,169,219,0,0             ; mulps         0xdba9(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  DB  68,15,89,21,149,219,0,0             ; mulps         0xdb95(%rip),%xmm10        # 2f260 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
@@ -31699,8 +31682,8 @@
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  65,15,92,193                        ; subps         %xmm9,%xmm0
-  DB  68,15,87,13,81,218,0,0              ; xorps         0xda51(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
-  DB  68,15,89,5,41,213,0,0               ; mulps         0xd529(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,87,13,141,218,0,0             ; xorps         0xda8d(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
+  DB  68,15,89,5,101,213,0,0              ; mulps         0xd565(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
@@ -31719,7 +31702,7 @@
   DB  243,69,15,89,203                    ; mulss         %xmm11,%xmm9
   DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
   DB  68,15,88,200                        ; addps         %xmm0,%xmm9
-  DB  68,15,89,13,32,219,0,0              ; mulps         0xdb20(%rip),%xmm9        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  DB  68,15,89,13,12,219,0,0              ; mulps         0xdb0c(%rip),%xmm9        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
@@ -31727,7 +31710,7 @@
   DB  243,69,15,89,219                    ; mulss         %xmm11,%xmm11
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,92,227                        ; subps         %xmm11,%xmm12
-  DB  68,15,89,21,11,219,0,0              ; mulps         0xdb0b(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
+  DB  68,15,89,21,247,218,0,0             ; mulps         0xdaf7(%rip),%xmm10        # 2f260 <_sk_srcover_bgra_8888_sse2_lowp+0x1118>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
@@ -31736,8 +31719,8 @@
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  65,15,92,193                        ; subps         %xmm9,%xmm0
-  DB  68,15,87,13,179,217,0,0             ; xorps         0xd9b3(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
-  DB  68,15,89,5,139,212,0,0              ; mulps         0xd48b(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,87,13,239,217,0,0             ; xorps         0xd9ef(%rip),%xmm9        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
+  DB  68,15,89,5,199,212,0,0              ; mulps         0xd4c7(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
@@ -31753,7 +31736,7 @@
   DB  243,69,15,89,200                    ; mulss         %xmm8,%xmm9
   DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
   DB  68,15,88,200                        ; addps         %xmm0,%xmm9
-  DB  68,15,89,13,147,218,0,0             ; mulps         0xda93(%rip),%xmm9        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
+  DB  68,15,89,13,127,218,0,0             ; mulps         0xda7f(%rip),%xmm9        # 2f250 <_sk_srcover_bgra_8888_sse2_lowp+0x1108>
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
   DB  69,15,89,210                        ; mulps         %xmm10,%xmm10
@@ -31761,7 +31744,7 @@
   DB  243,69,15,89,192                    ; mulss         %xmm8,%xmm8
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
   DB  65,15,92,192                        ; subps         %xmm8,%xmm0
-  DB  15,87,5,79,217,0,0                  ; xorps         0xd94f(%rip),%xmm0        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
+  DB  15,87,5,139,217,0,0                 ; xorps         0xd98b(%rip),%xmm0        # 2f180 <_sk_srcover_bgra_8888_sse2_lowp+0x1038>
   DB  65,15,94,193                        ; divps         %xmm9,%xmm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -31801,7 +31784,7 @@
 PUBLIC _sk_save_xy_sse2
 _sk_save_xy_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,182,211,0,0              ; movaps        0xd3b6(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,5,242,211,0,0              ; movaps        0xd3f2(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  15,17,0                             ; movups        %xmm0,(%rax)
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,88,200                        ; addps         %xmm8,%xmm9
@@ -31809,7 +31792,7 @@
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
   DB  69,15,194,218,1                     ; cmpltps       %xmm10,%xmm11
-  DB  68,15,40,37,161,211,0,0             ; movaps        0xd3a1(%rip),%xmm12        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  68,15,40,37,221,211,0,0             ; movaps        0xd3dd(%rip),%xmm12        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,84,220                        ; andps         %xmm12,%xmm11
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
@@ -31852,8 +31835,8 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,81,217,0,0                  ; addps         0xd951(%rip),%xmm0        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,249,210,0,0             ; movaps        0xd2f9(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,5,61,217,0,0                  ; addps         0xd93d(%rip),%xmm0        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,53,211,0,0              ; movaps        0xd335(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,0,1,0,0                ; movups        %xmm9,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -31864,7 +31847,7 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,197,210,0,0                 ; addps         0xd2c5(%rip),%xmm0        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  15,88,5,1,211,0,0                   ; addps         0xd301(%rip),%xmm0        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  68,15,17,128,0,1,0,0                ; movups        %xmm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -31874,8 +31857,8 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,4,217,0,0                  ; addps         0xd904(%rip),%xmm1        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,172,210,0,0             ; movaps        0xd2ac(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,13,240,216,0,0                ; addps         0xd8f0(%rip),%xmm1        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,232,210,0,0             ; movaps        0xd2e8(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,64,1,0,0               ; movups        %xmm9,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -31886,7 +31869,7 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,119,210,0,0                ; addps         0xd277(%rip),%xmm1        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  15,88,13,179,210,0,0                ; addps         0xd2b3(%rip),%xmm1        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  68,15,17,128,64,1,0,0               ; movups        %xmm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -31896,13 +31879,13 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,199,216,0,0                 ; addps         0xd8c7(%rip),%xmm0        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
-  DB  68,15,40,13,95,210,0,0              ; movaps        0xd25f(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,5,179,216,0,0                 ; addps         0xd8b3(%rip),%xmm0        # 2f280 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  DB  68,15,40,13,155,210,0,0             ; movaps        0xd29b(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,187,216,0,0             ; mulps         0xd8bb(%rip),%xmm9        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,13,67,212,0,0              ; addps         0xd443(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,13,167,216,0,0             ; mulps         0xd8a7(%rip),%xmm9        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,13,127,212,0,0             ; addps         0xd47f(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,0,1,0,0                ; movups        %xmm9,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -31913,16 +31896,16 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,111,216,0,0                 ; addps         0xd86f(%rip),%xmm0        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,23,210,0,0              ; movaps        0xd217(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,5,91,216,0,0                  ; addps         0xd85b(%rip),%xmm0        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,83,210,0,0              ; movaps        0xd253(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,139,216,0,0              ; movaps        0xd88b(%rip),%xmm8        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,5,119,216,0,0              ; movaps        0xd877(%rip),%xmm8        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,143,216,0,0              ; addps         0xd88f(%rip),%xmm8        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,5,123,216,0,0              ; addps         0xd87b(%rip),%xmm8        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,227,209,0,0              ; addps         0xd1e3(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,88,5,31,210,0,0               ; addps         0xd21f(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,135,216,0,0              ; addps         0xd887(%rip),%xmm8        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,5,115,216,0,0              ; addps         0xd873(%rip),%xmm8        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,128,0,1,0,0                ; movups        %xmm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -31930,17 +31913,17 @@
 PUBLIC _sk_bicubic_p1x_sse2
 _sk_bicubic_p1x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,193,209,0,0              ; movaps        0xd1c1(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,5,253,209,0,0              ; movaps        0xd1fd(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,136,128,0,0,0              ; movups        0x80(%rax),%xmm9
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,21,58,216,0,0              ; movaps        0xd83a(%rip),%xmm10        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,21,38,216,0,0              ; movaps        0xd826(%rip),%xmm10        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,62,216,0,0              ; addps         0xd83e(%rip),%xmm10        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,21,42,216,0,0              ; addps         0xd82a(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,58,216,0,0              ; addps         0xd83a(%rip),%xmm10        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,21,38,216,0,0              ; addps         0xd826(%rip),%xmm10        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,144,0,1,0,0                ; movups        %xmm10,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -31950,11 +31933,11 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,128,128,0,0,0              ; movups        0x80(%rax),%xmm8
-  DB  15,88,5,10,216,0,0                  ; addps         0xd80a(%rip),%xmm0        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  15,88,5,246,215,0,0                 ; addps         0xd7f6(%rip),%xmm0        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,218,215,0,0              ; mulps         0xd7da(%rip),%xmm8        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,5,98,211,0,0               ; addps         0xd362(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,5,198,215,0,0              ; mulps         0xd7c6(%rip),%xmm8        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,5,158,211,0,0              ; addps         0xd39e(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,0,1,0,0                ; movups        %xmm8,0x100(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -31965,13 +31948,13 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,157,215,0,0                ; addps         0xd79d(%rip),%xmm1        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
-  DB  68,15,40,13,53,209,0,0              ; movaps        0xd135(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,13,137,215,0,0                ; addps         0xd789(%rip),%xmm1        # 2f280 <_sk_srcover_bgra_8888_sse2_lowp+0x1138>
+  DB  68,15,40,13,113,209,0,0             ; movaps        0xd171(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,145,215,0,0             ; mulps         0xd791(%rip),%xmm9        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,13,25,211,0,0              ; addps         0xd319(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,13,125,215,0,0             ; mulps         0xd77d(%rip),%xmm9        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,13,85,211,0,0              ; addps         0xd355(%rip),%xmm9        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,64,1,0,0               ; movups        %xmm9,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -31982,16 +31965,16 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,68,215,0,0                 ; addps         0xd744(%rip),%xmm1        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
-  DB  68,15,40,13,236,208,0,0             ; movaps        0xd0ec(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,88,13,48,215,0,0                 ; addps         0xd730(%rip),%xmm1        # 2f270 <_sk_srcover_bgra_8888_sse2_lowp+0x1128>
+  DB  68,15,40,13,40,209,0,0              ; movaps        0xd128(%rip),%xmm9        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,96,215,0,0               ; movaps        0xd760(%rip),%xmm8        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,5,76,215,0,0               ; movaps        0xd74c(%rip),%xmm8        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,100,215,0,0              ; addps         0xd764(%rip),%xmm8        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,5,80,215,0,0               ; addps         0xd750(%rip),%xmm8        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,184,208,0,0              ; addps         0xd0b8(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,88,5,244,208,0,0              ; addps         0xd0f4(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,92,215,0,0               ; addps         0xd75c(%rip),%xmm8        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,5,72,215,0,0               ; addps         0xd748(%rip),%xmm8        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,128,64,1,0,0               ; movups        %xmm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -31999,17 +31982,17 @@
 PUBLIC _sk_bicubic_p1y_sse2
 _sk_bicubic_p1y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,150,208,0,0              ; movaps        0xd096(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
+  DB  68,15,40,5,210,208,0,0              ; movaps        0xd0d2(%rip),%xmm8        # 2ec60 <_sk_srcover_bgra_8888_sse2_lowp+0xb18>
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,136,192,0,0,0              ; movups        0xc0(%rax),%xmm9
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  68,15,40,21,14,215,0,0              ; movaps        0xd70e(%rip),%xmm10        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
+  DB  68,15,40,21,250,214,0,0             ; movaps        0xd6fa(%rip),%xmm10        # 2f2a0 <_sk_srcover_bgra_8888_sse2_lowp+0x1158>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,18,215,0,0              ; addps         0xd712(%rip),%xmm10        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  68,15,88,21,254,214,0,0             ; addps         0xd6fe(%rip),%xmm10        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,14,215,0,0              ; addps         0xd70e(%rip),%xmm10        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  68,15,88,21,250,214,0,0             ; addps         0xd6fa(%rip),%xmm10        # 2f2c0 <_sk_srcover_bgra_8888_sse2_lowp+0x1178>
   DB  68,15,17,144,64,1,0,0               ; movups        %xmm10,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -32019,11 +32002,11 @@
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,64                         ; movups        0x40(%rax),%xmm1
   DB  68,15,16,128,192,0,0,0              ; movups        0xc0(%rax),%xmm8
-  DB  15,88,13,221,214,0,0                ; addps         0xd6dd(%rip),%xmm1        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  15,88,13,201,214,0,0                ; addps         0xd6c9(%rip),%xmm1        # 2f2b0 <_sk_srcover_bgra_8888_sse2_lowp+0x1168>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,173,214,0,0              ; mulps         0xd6ad(%rip),%xmm8        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
-  DB  68,15,88,5,53,210,0,0               ; addps         0xd235(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
+  DB  68,15,89,5,153,214,0,0              ; mulps         0xd699(%rip),%xmm8        # 2f290 <_sk_srcover_bgra_8888_sse2_lowp+0x1148>
+  DB  68,15,88,5,113,210,0,0              ; addps         0xd271(%rip),%xmm8        # 2ee70 <_sk_srcover_bgra_8888_sse2_lowp+0xd28>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,64,1,0,0               ; movups        %xmm8,0x140(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -32152,7 +32135,7 @@
   DB  102,65,15,254,205                   ; paddd         %xmm13,%xmm1
   DB  102,68,15,127,108,36,80             ; movdqa        %xmm13,0x50(%rsp)
   DB  102,15,254,217                      ; paddd         %xmm1,%xmm3
-  DB  102,68,15,111,5,10,213,0,0          ; movdqa        0xd50a(%rip),%xmm8        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,68,15,111,5,246,212,0,0         ; movdqa        0xd4f6(%rip),%xmm8        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,112,195,245                  ; pshufd        $0xf5,%xmm3,%xmm0
   DB  102,65,15,244,216                   ; pmuludq       %xmm8,%xmm3
   DB  102,65,15,244,192                   ; pmuludq       %xmm8,%xmm0
@@ -32191,7 +32174,7 @@
   DB  243,66,15,16,44,139                 ; movss         (%rbx,%r9,4),%xmm5
   DB  15,20,236                           ; unpcklps      %xmm4,%xmm5
   DB  102,15,20,221                       ; unpcklpd      %xmm5,%xmm3
-  DB  102,68,15,111,37,193,205,0,0        ; movdqa        0xcdc1(%rip),%xmm12        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,37,253,205,0,0        ; movdqa        0xcdfd(%rip),%xmm12        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,196                   ; paddd         %xmm12,%xmm0
   DB  102,15,112,224,78                   ; pshufd        $0x4e,%xmm0,%xmm4
   DB  102,73,15,126,224                   ; movq          %xmm4,%r8
@@ -32207,7 +32190,7 @@
   DB  243,66,15,16,36,139                 ; movss         (%rbx,%r9,4),%xmm4
   DB  15,20,224                           ; unpcklps      %xmm0,%xmm4
   DB  102,15,20,236                       ; unpcklpd      %xmm4,%xmm5
-  DB  15,40,37,246,211,0,0                ; movaps        0xd3f6(%rip),%xmm4        # 2f320 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  DB  15,40,37,226,211,0,0                ; movaps        0xd3e2(%rip),%xmm4        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   DB  68,15,88,220                        ; addps         %xmm4,%xmm11
   DB  68,15,41,92,36,64                   ; movaps        %xmm11,0x40(%rsp)
   DB  68,15,40,223                        ; movaps        %xmm7,%xmm11
@@ -32243,7 +32226,7 @@
   DB  15,20,249                           ; unpcklps      %xmm1,%xmm7
   DB  102,15,20,252                       ; unpcklpd      %xmm4,%xmm7
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
-  DB  102,15,250,13,107,211,0,0           ; psubd         0xd36b(%rip),%xmm1        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,13,87,211,0,0            ; psubd         0xd357(%rip),%xmm1        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,225,78                   ; pshufd        $0x4e,%xmm1,%xmm4
   DB  102,73,15,126,224                   ; movq          %xmm4,%r8
   DB  102,72,15,126,200                   ; movq          %xmm1,%rax
@@ -32333,7 +32316,7 @@
   DB  243,15,16,44,131                    ; movss         (%rbx,%rax,4),%xmm5
   DB  15,20,233                           ; unpcklps      %xmm1,%xmm5
   DB  102,15,20,221                       ; unpcklpd      %xmm5,%xmm3
-  DB  102,68,15,111,45,51,203,0,0         ; movdqa        0xcb33(%rip),%xmm13        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,45,111,203,0,0        ; movdqa        0xcb6f(%rip),%xmm13        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,197                   ; paddd         %xmm13,%xmm0
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
   DB  102,73,15,126,200                   ; movq          %xmm1,%r8
@@ -32430,7 +32413,7 @@
   DB  102,15,111,206                      ; movdqa        %xmm6,%xmm1
   DB  102,65,15,254,202                   ; paddd         %xmm10,%xmm1
   DB  102,15,112,209,245                  ; pshufd        $0xf5,%xmm1,%xmm2
-  DB  102,15,111,29,8,208,0,0             ; movdqa        0xd008(%rip),%xmm3        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,29,244,207,0,0           ; movdqa        0xcff4(%rip),%xmm3        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,203                      ; pmuludq       %xmm3,%xmm1
   DB  102,15,244,211                      ; pmuludq       %xmm3,%xmm2
   DB  102,15,111,251                      ; movdqa        %xmm3,%xmm7
@@ -32468,7 +32451,7 @@
   DB  243,15,16,44,131                    ; movss         (%rbx,%rax,4),%xmm5
   DB  15,20,236                           ; unpcklps      %xmm4,%xmm5
   DB  102,15,20,213                       ; unpcklpd      %xmm5,%xmm2
-  DB  102,15,111,5,200,200,0,0            ; movdqa        0xc8c8(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,111,5,4,201,0,0              ; movdqa        0xc904(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,254,216                      ; paddd         %xmm0,%xmm3
   DB  102,15,112,227,78                   ; pshufd        $0x4e,%xmm3,%xmm4
   DB  102,73,15,126,224                   ; movq          %xmm4,%r8
@@ -32549,7 +32532,7 @@
   DB  102,15,111,214                      ; movdqa        %xmm6,%xmm2
   DB  102,65,15,254,212                   ; paddd         %xmm12,%xmm2
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,13,230,205,0,0           ; movdqa        0xcde6(%rip),%xmm1        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,13,210,205,0,0           ; movdqa        0xcdd2(%rip),%xmm1        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
   DB  102,15,244,193                      ; pmuludq       %xmm1,%xmm0
   DB  102,15,111,241                      ; movdqa        %xmm1,%xmm6
@@ -32571,7 +32554,7 @@
   DB  68,15,20,209                        ; unpcklps      %xmm1,%xmm10
   DB  102,68,15,20,210                    ; unpcklpd      %xmm2,%xmm10
   DB  102,15,111,200                      ; movdqa        %xmm0,%xmm1
-  DB  102,15,250,13,139,205,0,0           ; psubd         0xcd8b(%rip),%xmm1        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,13,119,205,0,0           ; psubd         0xcd77(%rip),%xmm1        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,209,78                   ; pshufd        $0x4e,%xmm1,%xmm2
   DB  102,73,15,126,208                   ; movq          %xmm2,%r8
   DB  102,72,15,126,200                   ; movq          %xmm1,%rax
@@ -32586,7 +32569,7 @@
   DB  243,15,16,20,131                    ; movss         (%rbx,%rax,4),%xmm2
   DB  15,20,209                           ; unpcklps      %xmm1,%xmm2
   DB  102,15,20,226                       ; unpcklpd      %xmm2,%xmm4
-  DB  102,15,254,5,166,198,0,0            ; paddd         0xc6a6(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,5,226,198,0,0            ; paddd         0xc6e2(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
   DB  102,73,15,126,200                   ; movq          %xmm1,%r8
   DB  102,72,15,126,192                   ; movq          %xmm0,%rax
@@ -32623,7 +32606,7 @@
   DB  15,20,199                           ; unpcklps      %xmm7,%xmm0
   DB  102,15,20,193                       ; unpcklpd      %xmm1,%xmm0
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
-  DB  102,15,250,13,149,204,0,0           ; psubd         0xcc95(%rip),%xmm1        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,13,129,204,0,0           ; psubd         0xcc81(%rip),%xmm1        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,249,78                   ; pshufd        $0x4e,%xmm1,%xmm7
   DB  102,73,15,126,248                   ; movq          %xmm7,%r8
   DB  102,72,15,126,200                   ; movq          %xmm1,%rax
@@ -32638,7 +32621,7 @@
   DB  243,15,16,52,131                    ; movss         (%rbx,%rax,4),%xmm6
   DB  15,20,247                           ; unpcklps      %xmm7,%xmm6
   DB  102,15,20,206                       ; unpcklpd      %xmm6,%xmm1
-  DB  102,15,254,21,176,197,0,0           ; paddd         0xc5b0(%rip),%xmm2        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,21,236,197,0,0           ; paddd         0xc5ec(%rip),%xmm2        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,112,242,78                   ; pshufd        $0x4e,%xmm2,%xmm6
   DB  102,73,15,126,240                   ; movq          %xmm6,%r8
   DB  102,72,15,126,208                   ; movq          %xmm2,%rax
@@ -32782,7 +32765,7 @@
   DB  102,65,15,254,210                   ; paddd         %xmm10,%xmm2
   DB  102,15,111,220                      ; movdqa        %xmm4,%xmm3
   DB  102,15,254,218                      ; paddd         %xmm2,%xmm3
-  DB  102,15,111,37,185,201,0,0           ; movdqa        0xc9b9(%rip),%xmm4        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,37,165,201,0,0           ; movdqa        0xc9a5(%rip),%xmm4        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,112,195,245                  ; pshufd        $0xf5,%xmm3,%xmm0
   DB  102,15,244,220                      ; pmuludq       %xmm4,%xmm3
   DB  102,15,244,196                      ; pmuludq       %xmm4,%xmm0
@@ -32821,7 +32804,7 @@
   DB  243,66,15,16,52,139                 ; movss         (%rbx,%r9,4),%xmm6
   DB  15,20,244                           ; unpcklps      %xmm4,%xmm6
   DB  102,15,20,238                       ; unpcklpd      %xmm6,%xmm5
-  DB  102,15,254,5,115,194,0,0            ; paddd         0xc273(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,5,175,194,0,0            ; paddd         0xc2af(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,112,224,78                   ; pshufd        $0x4e,%xmm0,%xmm4
   DB  102,73,15,126,224                   ; movq          %xmm4,%r8
   DB  102,73,15,126,193                   ; movq          %xmm0,%r9
@@ -32836,7 +32819,7 @@
   DB  243,66,15,16,52,139                 ; movss         (%rbx,%r9,4),%xmm6
   DB  15,20,244                           ; unpcklps      %xmm4,%xmm6
   DB  102,15,20,222                       ; unpcklpd      %xmm6,%xmm3
-  DB  15,40,53,173,200,0,0                ; movaps        0xc8ad(%rip),%xmm6        # 2f320 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  DB  15,40,53,153,200,0,0                ; movaps        0xc899(%rip),%xmm6        # 2f2d0 <_sk_srcover_bgra_8888_sse2_lowp+0x1188>
   DB  68,15,88,198                        ; addps         %xmm6,%xmm8
   DB  68,15,41,68,36,48                   ; movaps        %xmm8,0x30(%rsp)
   DB  68,15,88,254                        ; addps         %xmm6,%xmm15
@@ -32854,7 +32837,7 @@
   DB  102,65,15,254,215                   ; paddd         %xmm15,%xmm2
   DB  102,68,15,127,124,36,96             ; movdqa        %xmm15,0x60(%rsp)
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,13,96,200,0,0            ; movdqa        0xc860(%rip),%xmm1        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,13,76,200,0,0            ; movdqa        0xc84c(%rip),%xmm1        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
   DB  102,15,244,193                      ; pmuludq       %xmm1,%xmm0
   DB  102,15,112,192,232                  ; pshufd        $0xe8,%xmm0,%xmm0
@@ -32875,7 +32858,7 @@
   DB  15,20,240                           ; unpcklps      %xmm0,%xmm6
   DB  102,15,20,242                       ; unpcklpd      %xmm2,%xmm6
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
-  DB  102,15,250,5,11,200,0,0             ; psubd         0xc80b(%rip),%xmm0        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,5,247,199,0,0            ; psubd         0xc7f7(%rip),%xmm0        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,208,78                   ; pshufd        $0x4e,%xmm0,%xmm2
   DB  102,73,15,126,208                   ; movq          %xmm2,%r8
   DB  102,72,15,126,192                   ; movq          %xmm0,%rax
@@ -32890,7 +32873,7 @@
   DB  243,68,15,16,28,131                 ; movss         (%rbx,%rax,4),%xmm11
   DB  68,15,20,216                        ; unpcklps      %xmm0,%xmm11
   DB  102,65,15,20,211                    ; unpcklpd      %xmm11,%xmm2
-  DB  102,15,254,13,35,193,0,0            ; paddd         0xc123(%rip),%xmm1        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,13,95,193,0,0            ; paddd         0xc15f(%rip),%xmm1        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,112,193,78                   ; pshufd        $0x4e,%xmm1,%xmm0
   DB  102,73,15,126,192                   ; movq          %xmm0,%r8
   DB  102,72,15,126,200                   ; movq          %xmm1,%rax
@@ -32928,7 +32911,7 @@
   DB  102,65,15,111,194                   ; movdqa        %xmm10,%xmm0
   DB  102,15,254,68,36,32                 ; paddd         0x20(%rsp),%xmm0
   DB  102,15,112,216,245                  ; pshufd        $0xf5,%xmm0,%xmm3
-  DB  102,15,111,37,7,199,0,0             ; movdqa        0xc707(%rip),%xmm4        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,37,243,198,0,0           ; movdqa        0xc6f3(%rip),%xmm4        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,196                      ; pmuludq       %xmm4,%xmm0
   DB  102,15,244,220                      ; pmuludq       %xmm4,%xmm3
   DB  102,68,15,111,220                   ; movdqa        %xmm4,%xmm11
@@ -32966,7 +32949,7 @@
   DB  243,15,16,60,131                    ; movss         (%rbx,%rax,4),%xmm7
   DB  15,20,253                           ; unpcklps      %xmm5,%xmm7
   DB  102,15,20,199                       ; unpcklpd      %xmm7,%xmm0
-  DB  102,68,15,111,5,195,191,0,0         ; movdqa        0xbfc3(%rip),%xmm8        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,5,255,191,0,0         ; movdqa        0xbfff(%rip),%xmm8        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,224                   ; paddd         %xmm8,%xmm4
   DB  102,15,112,236,78                   ; pshufd        $0x4e,%xmm4,%xmm5
   DB  102,73,15,126,232                   ; movq          %xmm5,%r8
@@ -33112,7 +33095,7 @@
   DB  243,15,16,36,131                    ; movss         (%rbx,%rax,4),%xmm4
   DB  15,20,226                           ; unpcklps      %xmm2,%xmm4
   DB  102,15,20,252                       ; unpcklpd      %xmm4,%xmm7
-  DB  102,68,15,111,61,18,189,0,0         ; movdqa        0xbd12(%rip),%xmm15        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,61,78,189,0,0         ; movdqa        0xbd4e(%rip),%xmm15        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,207                   ; paddd         %xmm15,%xmm1
   DB  102,15,112,209,78                   ; pshufd        $0x4e,%xmm1,%xmm2
   DB  102,73,15,126,208                   ; movq          %xmm2,%r8
@@ -33195,7 +33178,7 @@
   DB  102,65,15,111,217                   ; movdqa        %xmm9,%xmm3
   DB  102,65,15,254,216                   ; paddd         %xmm8,%xmm3
   DB  102,15,112,211,245                  ; pshufd        $0xf5,%xmm3,%xmm2
-  DB  102,15,111,45,43,194,0,0            ; movdqa        0xc22b(%rip),%xmm5        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,45,23,194,0,0            ; movdqa        0xc217(%rip),%xmm5        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,221                      ; pmuludq       %xmm5,%xmm3
   DB  102,15,244,213                      ; pmuludq       %xmm5,%xmm2
   DB  102,15,112,234,232                  ; pshufd        $0xe8,%xmm2,%xmm5
@@ -33216,7 +33199,7 @@
   DB  68,15,20,198                        ; unpcklps      %xmm6,%xmm8
   DB  102,68,15,20,197                    ; unpcklpd      %xmm5,%xmm8
   DB  102,15,111,234                      ; movdqa        %xmm2,%xmm5
-  DB  102,15,250,45,212,193,0,0           ; psubd         0xc1d4(%rip),%xmm5        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,45,192,193,0,0           ; psubd         0xc1c0(%rip),%xmm5        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,245,78                   ; pshufd        $0x4e,%xmm5,%xmm6
   DB  102,73,15,126,240                   ; movq          %xmm6,%r8
   DB  102,72,15,126,232                   ; movq          %xmm5,%rax
@@ -33248,7 +33231,7 @@
   DB  102,15,20,214                       ; unpcklpd      %xmm6,%xmm2
   DB  102,69,15,254,205                   ; paddd         %xmm13,%xmm9
   DB  102,65,15,112,233,245               ; pshufd        $0xf5,%xmm9,%xmm5
-  DB  102,15,111,29,50,193,0,0            ; movdqa        0xc132(%rip),%xmm3        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,29,30,193,0,0            ; movdqa        0xc11e(%rip),%xmm3        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,68,15,244,203                   ; pmuludq       %xmm3,%xmm9
   DB  102,15,244,235                      ; pmuludq       %xmm3,%xmm5
   DB  102,15,112,237,232                  ; pshufd        $0xe8,%xmm5,%xmm5
@@ -33269,7 +33252,7 @@
   DB  69,15,20,233                        ; unpcklps      %xmm9,%xmm13
   DB  102,68,15,20,237                    ; unpcklpd      %xmm5,%xmm13
   DB  102,65,15,111,239                   ; movdqa        %xmm15,%xmm5
-  DB  102,15,250,45,210,192,0,0           ; psubd         0xc0d2(%rip),%xmm5        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,45,190,192,0,0           ; psubd         0xc0be(%rip),%xmm5        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,221,78                   ; pshufd        $0x4e,%xmm5,%xmm3
   DB  102,73,15,126,216                   ; movq          %xmm3,%r8
   DB  102,72,15,126,232                   ; movq          %xmm5,%rax
@@ -33284,7 +33267,7 @@
   DB  243,15,16,44,131                    ; movss         (%rbx,%rax,4),%xmm5
   DB  15,20,235                           ; unpcklps      %xmm3,%xmm5
   DB  102,15,20,245                       ; unpcklpd      %xmm5,%xmm6
-  DB  102,68,15,111,13,236,185,0,0        ; movdqa        0xb9ec(%rip),%xmm9        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,13,40,186,0,0         ; movdqa        0xba28(%rip),%xmm9        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,69,15,254,249                   ; paddd         %xmm9,%xmm15
   DB  102,65,15,112,223,78                ; pshufd        $0x4e,%xmm15,%xmm3
   DB  102,73,15,126,216                   ; movq          %xmm3,%r8
@@ -33346,7 +33329,7 @@
   DB  102,65,15,111,195                   ; movdqa        %xmm11,%xmm0
   DB  102,15,254,195                      ; paddd         %xmm3,%xmm0
   DB  102,15,112,200,245                  ; pshufd        $0xf5,%xmm0,%xmm1
-  DB  102,15,111,21,102,191,0,0           ; movdqa        0xbf66(%rip),%xmm2        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,21,82,191,0,0            ; movdqa        0xbf52(%rip),%xmm2        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,194                      ; pmuludq       %xmm2,%xmm0
   DB  102,15,244,202                      ; pmuludq       %xmm2,%xmm1
   DB  102,68,15,111,234                   ; movdqa        %xmm2,%xmm13
@@ -33539,7 +33522,7 @@
   DB  68,15,20,238                        ; unpcklps      %xmm6,%xmm13
   DB  102,68,15,20,234                    ; unpcklpd      %xmm2,%xmm13
   DB  102,15,111,212                      ; movdqa        %xmm4,%xmm2
-  DB  102,15,250,21,255,187,0,0           ; psubd         0xbbff(%rip),%xmm2        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,21,235,187,0,0           ; psubd         0xbbeb(%rip),%xmm2        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,242,78                   ; pshufd        $0x4e,%xmm2,%xmm6
   DB  102,73,15,126,240                   ; movq          %xmm6,%r8
   DB  102,72,15,126,208                   ; movq          %xmm2,%rax
@@ -33599,7 +33582,7 @@
   DB  102,68,15,111,116,36,32             ; movdqa        0x20(%rsp),%xmm14
   DB  102,65,15,254,198                   ; paddd         %xmm14,%xmm0
   DB  102,15,112,200,245                  ; pshufd        $0xf5,%xmm0,%xmm1
-  DB  102,68,15,111,5,220,186,0,0         ; movdqa        0xbadc(%rip),%xmm8        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,68,15,111,5,200,186,0,0         ; movdqa        0xbac8(%rip),%xmm8        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,65,15,244,192                   ; pmuludq       %xmm8,%xmm0
   DB  102,65,15,244,200                   ; pmuludq       %xmm8,%xmm1
   DB  102,65,15,111,240                   ; movdqa        %xmm8,%xmm6
@@ -33637,7 +33620,7 @@
   DB  243,15,16,44,131                    ; movss         (%rbx,%rax,4),%xmm5
   DB  15,20,235                           ; unpcklps      %xmm3,%xmm5
   DB  102,15,20,205                       ; unpcklpd      %xmm5,%xmm1
-  DB  102,68,15,111,5,154,179,0,0         ; movdqa        0xb39a(%rip),%xmm8        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,68,15,111,5,214,179,0,0         ; movdqa        0xb3d6(%rip),%xmm8        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,65,15,254,208                   ; paddd         %xmm8,%xmm2
   DB  102,15,112,218,78                   ; pshufd        $0x4e,%xmm2,%xmm3
   DB  102,73,15,126,216                   ; movq          %xmm3,%r8
@@ -33720,7 +33703,7 @@
   DB  102,15,254,208                      ; paddd         %xmm0,%xmm2
   DB  102,15,111,240                      ; movdqa        %xmm0,%xmm6
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,37,180,184,0,0           ; movdqa        0xb8b4(%rip),%xmm4        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  102,15,111,37,160,184,0,0           ; movdqa        0xb8a0(%rip),%xmm4        # 2f2e0 <_sk_srcover_bgra_8888_sse2_lowp+0x1198>
   DB  102,15,244,212                      ; pmuludq       %xmm4,%xmm2
   DB  102,15,244,196                      ; pmuludq       %xmm4,%xmm0
   DB  102,15,112,200,232                  ; pshufd        $0xe8,%xmm0,%xmm1
@@ -33741,7 +33724,7 @@
   DB  68,15,20,193                        ; unpcklps      %xmm1,%xmm8
   DB  102,68,15,20,194                    ; unpcklpd      %xmm2,%xmm8
   DB  102,15,111,200                      ; movdqa        %xmm0,%xmm1
-  DB  102,15,250,13,93,184,0,0            ; psubd         0xb85d(%rip),%xmm1        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,13,73,184,0,0            ; psubd         0xb849(%rip),%xmm1        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,209,78                   ; pshufd        $0x4e,%xmm1,%xmm2
   DB  102,73,15,126,208                   ; movq          %xmm2,%r8
   DB  102,72,15,126,200                   ; movq          %xmm1,%rax
@@ -33756,7 +33739,7 @@
   DB  243,15,16,20,131                    ; movss         (%rbx,%rax,4),%xmm2
   DB  15,20,209                           ; unpcklps      %xmm1,%xmm2
   DB  102,68,15,20,242                    ; unpcklpd      %xmm2,%xmm14
-  DB  102,15,254,5,118,177,0,0            ; paddd         0xb176(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,5,178,177,0,0            ; paddd         0xb1b2(%rip),%xmm0        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
   DB  102,73,15,126,200                   ; movq          %xmm1,%r8
   DB  102,72,15,126,192                   ; movq          %xmm0,%rax
@@ -33794,7 +33777,7 @@
   DB  15,20,198                           ; unpcklps      %xmm6,%xmm0
   DB  102,15,20,193                       ; unpcklpd      %xmm1,%xmm0
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
-  DB  102,15,250,13,101,183,0,0           ; psubd         0xb765(%rip),%xmm1        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  102,15,250,13,81,183,0,0            ; psubd         0xb751(%rip),%xmm1        # 2f2f0 <_sk_srcover_bgra_8888_sse2_lowp+0x11a8>
   DB  102,15,112,241,78                   ; pshufd        $0x4e,%xmm1,%xmm6
   DB  102,73,15,126,240                   ; movq          %xmm6,%r8
   DB  102,72,15,126,200                   ; movq          %xmm1,%rax
@@ -33809,7 +33792,7 @@
   DB  243,15,16,36,131                    ; movss         (%rbx,%rax,4),%xmm4
   DB  15,20,230                           ; unpcklps      %xmm6,%xmm4
   DB  102,15,20,204                       ; unpcklpd      %xmm4,%xmm1
-  DB  102,15,254,21,128,176,0,0           ; paddd         0xb080(%rip),%xmm2        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
+  DB  102,15,254,21,188,176,0,0           ; paddd         0xb0bc(%rip),%xmm2        # 2eca0 <_sk_srcover_bgra_8888_sse2_lowp+0xb58>
   DB  102,15,112,226,78                   ; pshufd        $0x4e,%xmm2,%xmm4
   DB  102,73,15,126,224                   ; movq          %xmm4,%r8
   DB  102,72,15,126,208                   ; movq          %xmm2,%rax
@@ -33868,7 +33851,7 @@
   DB  15,89,212                           ; mulps         %xmm4,%xmm2
   DB  65,15,88,215                        ; addps         %xmm15,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,102,175,0,0                ; movaps        0xaf66(%rip),%xmm3        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
+  DB  15,40,29,162,175,0,0                ; movaps        0xafa2(%rip),%xmm3        # 2ec70 <_sk_srcover_bgra_8888_sse2_lowp+0xb28>
   DB  15,40,164,36,224,0,0,0              ; movaps        0xe0(%rsp),%xmm4
   DB  15,40,172,36,240,0,0,0              ; movaps        0xf0(%rsp),%xmm5
   DB  15,40,180,36,0,1,0,0                ; movaps        0x100(%rsp),%xmm6
@@ -33877,6 +33860,23 @@
   DB  91                                  ; pop           %rbx
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_gauss_a_to_rgba_sse2
+_sk_gauss_a_to_rgba_sse2 LABEL PROC
+  DB  15,40,5,1,182,0,0                   ; movaps        0xb601(%rip),%xmm0        # 2f300 <_sk_srcover_bgra_8888_sse2_lowp+0x11b8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,7,182,0,0                   ; addps         0xb607(%rip),%xmm0        # 2f310 <_sk_srcover_bgra_8888_sse2_lowp+0x11c8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,13,182,0,0                  ; addps         0xb60d(%rip),%xmm0        # 2f320 <_sk_srcover_bgra_8888_sse2_lowp+0x11d8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,19,182,0,0                  ; addps         0xb613(%rip),%xmm0        # 2f330 <_sk_srcover_bgra_8888_sse2_lowp+0x11e8>
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,5,25,182,0,0                  ; addps         0xb619(%rip),%xmm0        # 2f340 <_sk_srcover_bgra_8888_sse2_lowp+0x11f8>
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  15,40,200                           ; movaps        %xmm0,%xmm1
+  DB  15,40,208                           ; movaps        %xmm0,%xmm2
+  DB  15,40,216                           ; movaps        %xmm0,%xmm3
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_start_pipeline_hsw_lowp
 _sk_start_pipeline_hsw_lowp LABEL PROC
   DB  85                                  ; push          %rbp
@@ -44645,15 +44645,9 @@
   DB  55                                  ; (bad)
   DB  63                                  ; (bad)
   DB  152                                 ; cwtl
-  DB  221,147,61,18,120,57                ; fstl          0x3978123d(%rbx)
-  DB  64,45,16,17,192,32                  ; rex           sub $0x20c01110,%eax
-  DB  148                                 ; xchg          %eax,%esp
-  DB  90                                  ; pop           %rdx
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; (bad)
-  DB  62,0,24                             ; add           %bl,%ds:(%rax)
-  DB  161,57,111,43,231,187,159,215,202   ; movabs        0xcad79fbbe72b6f39,%eax
-  DB  60,212                              ; cmp           $0xd4,%al
+  DB  221,147,61,111,43,231               ; fstl          -0x18d490c3(%rbx)
+  DB  187,159,215,202,60                  ; mov           $0x3ccad79f,%ebx
+  DB  212                                 ; (bad)
   DB  100,84                              ; fs            push %rsp
   DB  189,169,240,34,62                   ; mov           $0x3e22f0a9,%ebp
   DB  0,0                                 ; add           %al,(%rax)
@@ -44664,20 +44658,25 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,0,0                             ; addb          $0x0,(%rax)
   DB  0,191,0,0,192,191                   ; add           %bh,-0x40400000(%rdi)
-  DB  114,28                              ; jb            2e57e <.literal4+0x18a>
+  DB  114,28                              ; jb            2e56a <.literal4+0x176>
   DB  199                                 ; (bad)
   DB  62,0,0                              ; add           %al,%ds:(%rax)
   DB  192,63,85                           ; sarb          $0x55,(%rdi)
   DB  85                                  ; push          %rbp
   DB  149                                 ; xchg          %eax,%ebp
   DB  191,57,142,99,61                    ; mov           $0x3d638e39,%edi
-  DB  114,249                             ; jb            2e56b <.literal4+0x177>
-  DB  127,63                              ; jg            2e5b3 <_sk_srcover_bgra_8888_sse2_lowp+0x46b>
+  DB  114,249                             ; jb            2e557 <.literal4+0x163>
+  DB  127,63                              ; jg            2e59f <_sk_srcover_bgra_8888_sse2_lowp+0x457>
   DB  3,0                                 ; add           (%rax),%eax
   DB  0,0                                 ; add           %al,(%rax)
-  DB  0,4,0                               ; add           %al,(%rax,%rax,1)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
+  DB  18,120,57                           ; adc           0x39(%rax),%bh
+  DB  64,45,16,17,192,32                  ; rex           sub $0x20c01110,%eax
+  DB  148                                 ; xchg          %eax,%esp
+  DB  90                                  ; pop           %rdx
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; (bad)
+  DB  62,0,24                             ; add           %bl,%ds:(%rax)
+  DB  161,57,0,4,0,0,0,0,0                ; movabs        0x40039,%eax
   DB  56,255                              ; cmp           %bh,%bh
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -46211,28 +46210,7 @@
   DB  221,147,61,152,221,147              ; fstl          -0x6c2267c3(%rbx)
   DB  61,152,221,147,61                   ; cmp           $0x3d93dd98,%eax
   DB  152                                 ; cwtl
-  DB  221,147,61,45,16,17                 ; fstl          0x11102d3d(%rbx)
-  DB  192,45,16,17,192,45,16              ; shrb          $0x10,0x2dc01110(%rip)        # 2dc3031a <_sk_srcover_bgra_8888_sse2_lowp+0x2dc021d2>
-  DB  17,192                              ; adc           %eax,%eax
-  DB  45,16,17,192,18                     ; sub           $0x12c01110,%eax
-  DB  120,57                              ; js            2f24c <.literal16+0x63c>
-  DB  64,18,120,57                        ; adc           0x39(%rax),%dil
-  DB  64,18,120,57                        ; adc           0x39(%rax),%dil
-  DB  64,18,120,57                        ; adc           0x39(%rax),%dil
-  DB  64,32,148,90,62,32,148,90           ; and           %dl,0x5a94203e(%rdx,%rbx,2)
-  DB  62,32,148,90,62,32,148,90           ; and           %dl,%ds:0x5a94203e(%rdx,%rbx,2)
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; (bad)
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; (bad)
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; (bad)
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; (bad)
-  DB  62,0,24                             ; add           %bl,%ds:(%rax)
-  DB  161,57,0,24,161,57,0,24,161         ; movabs        0xa1180039a1180039,%eax
-  DB  57,0                                ; cmp           %eax,(%rax)
-  DB  24,161,57,111,43,231                ; sbb           %ah,-0x18d490c7(%rcx)
+  DB  221,147,61,111,43,231               ; fstl          -0x18d490c3(%rbx)
   DB  187,111,43,231,187                  ; mov           $0xbbe72b6f,%ebx
   DB  111                                 ; outsl         %ds:(%rsi),(%dx)
   DB  43,231                              ; sub           %edi,%esp
@@ -46280,13 +46258,13 @@
   DB  192,191,0,0,192,191,0               ; sarb          $0x0,-0x40400000(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  191,0,0,192,191                     ; mov           $0xbfc00000,%edi
-  DB  114,28                              ; jb            2f2fe <.literal16+0x6ee>
+  DB  114,28                              ; jb            2f2ae <.literal16+0x69e>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         2f302 <.literal16+0x6f2>
+  DB  62,114,28                           ; jb,pt         2f2b2 <.literal16+0x6a2>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         2f306 <.literal16+0x6f6>
+  DB  62,114,28                           ; jb,pt         2f2b6 <.literal16+0x6a6>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         2f30a <.literal16+0x6fa>
+  DB  62,114,28                           ; jb,pt         2f2ba <.literal16+0x6aa>
   DB  199                                 ; (bad)
   DB  62,85                               ; ds            push %rbp
   DB  85                                  ; push          %rbp
@@ -46305,15 +46283,15 @@
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d668155 <_sk_srcover_bgra_8888_sse2_lowp+0x3d63a00d>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d668105 <_sk_srcover_bgra_8888_sse2_lowp+0x3d639fbd>
   DB  57,142,99,61,114,249                ; cmp           %ecx,-0x68dc29d(%rsi)
-  DB  127,63                              ; jg            2f363 <.literal16+0x753>
-  DB  114,249                             ; jb            2f31f <.literal16+0x70f>
-  DB  127,63                              ; jg            2f367 <.literal16+0x757>
-  DB  114,249                             ; jb            2f323 <.literal16+0x713>
-  DB  127,63                              ; jg            2f36b <.literal16+0x75b>
-  DB  114,249                             ; jb            2f327 <.literal16+0x717>
-  DB  127,63                              ; jg            2f36f <.literal16+0x75f>
+  DB  127,63                              ; jg            2f313 <.literal16+0x703>
+  DB  114,249                             ; jb            2f2cf <.literal16+0x6bf>
+  DB  127,63                              ; jg            2f317 <.literal16+0x707>
+  DB  114,249                             ; jb            2f2d3 <.literal16+0x6c3>
+  DB  127,63                              ; jg            2f31b <.literal16+0x70b>
+  DB  114,249                             ; jb            2f2d7 <.literal16+0x6c7>
+  DB  127,63                              ; jg            2f31f <.literal16+0x70f>
   DB  3,0                                 ; add           (%rax),%eax
   DB  0,0                                 ; add           %al,(%rax)
   DB  3,0                                 ; add           (%rax),%eax
@@ -46337,8 +46315,28 @@
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,0                               ; incl          (%rax)
-  DB  0,255                               ; add           %bh,%bh
+  DB  255,45,16,17,192,45                 ; ljmp          *0x2dc01110(%rip)        # 2dc30415 <_sk_srcover_bgra_8888_sse2_lowp+0x2dc022cd>
+  DB  16,17                               ; adc           %dl,(%rcx)
+  DB  192,45,16,17,192,45,16              ; shrb          $0x10,0x2dc01110(%rip)        # 2dc3041e <_sk_srcover_bgra_8888_sse2_lowp+0x2dc022d6>
+  DB  17,192                              ; adc           %eax,%eax
+  DB  18,120,57                           ; adc           0x39(%rax),%bh
+  DB  64,18,120,57                        ; adc           0x39(%rax),%dil
+  DB  64,18,120,57                        ; adc           0x39(%rax),%dil
+  DB  64,18,120,57                        ; adc           0x39(%rax),%dil
+  DB  64,32,148,90,62,32,148,90           ; and           %dl,0x5a94203e(%rdx,%rbx,2)
+  DB  62,32,148,90,62,32,148,90           ; and           %dl,%ds:0x5a94203e(%rdx,%rbx,2)
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; (bad)
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; (bad)
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; (bad)
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; (bad)
+  DB  62,0,24                             ; add           %bl,%ds:(%rax)
+  DB  161,57,0,24,161,57,0,24,161         ; movabs        0xa1180039a1180039,%eax
+  DB  57,0                                ; cmp           %eax,(%rax)
+  DB  24,161,57,0,0,255                   ; sbb           %ah,-0xffffc7(%rcx)
   DB  255,0                               ; incl          (%rax)
   DB  0,255                               ; add           %bh,%bh
   DB  255,0                               ; incl          (%rax)
@@ -54810,35 +54808,6 @@
   DB  93                                  ; pop           %ebp
   DB  195                                 ; ret
 
-PUBLIC _sk_gauss_a_to_rgba_sse2
-_sk_gauss_a_to_rgba_sse2 LABEL PROC
-  DB  85                                  ; push          %ebp
-  DB  137,229                             ; mov           %esp,%ebp
-  DB  131,236,8                           ; sub           $0x8,%esp
-  DB  232,0,0,0,0                         ; call          698e <_sk_gauss_a_to_rgba_sse2+0xb>
-  DB  88                                  ; pop           %eax
-  DB  139,77,12                           ; mov           0xc(%ebp),%ecx
-  DB  15,40,128,242,121,0,0               ; movaps        0x79f2(%eax),%xmm0
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,128,2,122,0,0                 ; addps         0x7a02(%eax),%xmm0
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,128,18,122,0,0                ; addps         0x7a12(%eax),%xmm0
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,128,34,122,0,0                ; addps         0x7a22(%eax),%xmm0
-  DB  15,89,195                           ; mulps         %xmm3,%xmm0
-  DB  15,88,128,50,122,0,0                ; addps         0x7a32(%eax),%xmm0
-  DB  141,65,4                            ; lea           0x4(%ecx),%eax
-  DB  131,236,8                           ; sub           $0x8,%esp
-  DB  15,40,200                           ; movaps        %xmm0,%xmm1
-  DB  15,40,208                           ; movaps        %xmm0,%xmm2
-  DB  15,40,216                           ; movaps        %xmm0,%xmm3
-  DB  80                                  ; push          %eax
-  DB  255,117,8                           ; pushl         0x8(%ebp)
-  DB  255,17                              ; call          *(%ecx)
-  DB  131,196,24                          ; add           $0x18,%esp
-  DB  93                                  ; pop           %ebp
-  DB  195                                 ; ret
-
 PUBLIC _sk_gradient_sse2
 _sk_gradient_sse2 LABEL PROC
   DB  85                                  ; push          %ebp
@@ -54852,7 +54821,7 @@
   DB  139,10                              ; mov           (%edx),%ecx
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
   DB  131,249,2                           ; cmp           $0x2,%ecx
-  DB  114,33                              ; jb            6a15 <_sk_gradient_sse2+0x3a>
+  DB  114,33                              ; jb            69bd <_sk_gradient_sse2+0x3a>
   DB  139,114,36                          ; mov           0x24(%edx),%esi
   DB  73                                  ; dec           %ecx
   DB  131,198,4                           ; add           $0x4,%esi
@@ -54863,7 +54832,7 @@
   DB  102,15,250,202                      ; psubd         %xmm2,%xmm1
   DB  131,198,4                           ; add           $0x4,%esi
   DB  73                                  ; dec           %ecx
-  DB  117,234                             ; jne           69ff <_sk_gradient_sse2+0x24>
+  DB  117,234                             ; jne           69a7 <_sk_gradient_sse2+0x24>
   DB  102,15,126,206                      ; movd          %xmm1,%esi
   DB  102,15,112,209,229                  ; pshufd        $0xe5,%xmm1,%xmm2
   DB  102,15,112,217,78                   ; pshufd        $0x4e,%xmm1,%xmm3
@@ -55012,7 +54981,7 @@
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
   DB  15,40,209                           ; movaps        %xmm1,%xmm2
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
-  DB  232,0,0,0,0                         ; call          6c1d <_sk_xy_to_unit_angle_sse2+0x18>
+  DB  232,0,0,0,0                         ; call          6bc5 <_sk_xy_to_unit_angle_sse2+0x18>
   DB  88                                  ; pop           %eax
   DB  15,87,237                           ; xorps         %xmm5,%xmm5
   DB  15,92,233                           ; subps         %xmm1,%xmm5
@@ -55027,30 +54996,30 @@
   DB  15,94,247                           ; divps         %xmm7,%xmm6
   DB  15,40,254                           ; movaps        %xmm6,%xmm7
   DB  15,89,255                           ; mulps         %xmm7,%xmm7
-  DB  15,40,128,179,119,0,0               ; movaps        0x77b3(%eax),%xmm0
+  DB  15,40,128,187,119,0,0               ; movaps        0x77bb(%eax),%xmm0
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  15,88,128,195,119,0,0               ; addps         0x77c3(%eax),%xmm0
+  DB  15,88,128,203,119,0,0               ; addps         0x77cb(%eax),%xmm0
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  15,88,128,211,119,0,0               ; addps         0x77d3(%eax),%xmm0
+  DB  15,88,128,219,119,0,0               ; addps         0x77db(%eax),%xmm0
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  15,88,128,227,119,0,0               ; addps         0x77e3(%eax),%xmm0
+  DB  15,88,128,235,119,0,0               ; addps         0x77eb(%eax),%xmm0
   DB  15,89,198                           ; mulps         %xmm6,%xmm0
   DB  15,194,236,1                        ; cmpltps       %xmm4,%xmm5
-  DB  15,40,176,243,119,0,0               ; movaps        0x77f3(%eax),%xmm6
+  DB  15,40,176,251,119,0,0               ; movaps        0x77fb(%eax),%xmm6
   DB  15,92,240                           ; subps         %xmm0,%xmm6
   DB  15,84,245                           ; andps         %xmm5,%xmm6
   DB  15,85,232                           ; andnps        %xmm0,%xmm5
   DB  15,87,228                           ; xorps         %xmm4,%xmm4
   DB  15,86,238                           ; orps          %xmm6,%xmm5
   DB  15,194,204,1                        ; cmpltps       %xmm4,%xmm1
-  DB  15,40,128,179,113,0,0               ; movaps        0x71b3(%eax),%xmm0
+  DB  15,40,128,11,114,0,0                ; movaps        0x720b(%eax),%xmm0
   DB  15,92,197                           ; subps         %xmm5,%xmm0
   DB  15,84,193                           ; andps         %xmm1,%xmm0
   DB  15,85,205                           ; andnps        %xmm5,%xmm1
   DB  15,86,200                           ; orps          %xmm0,%xmm1
   DB  15,40,194                           ; movaps        %xmm2,%xmm0
   DB  15,194,196,1                        ; cmpltps       %xmm4,%xmm0
-  DB  15,40,168,195,113,0,0               ; movaps        0x71c3(%eax),%xmm5
+  DB  15,40,168,27,114,0,0                ; movaps        0x721b(%eax),%xmm5
   DB  15,92,233                           ; subps         %xmm1,%xmm5
   DB  15,84,232                           ; andps         %xmm0,%xmm5
   DB  15,85,193                           ; andnps        %xmm1,%xmm0
@@ -55107,15 +55076,15 @@
   DB  15,40,241                           ; movaps        %xmm1,%xmm6
   DB  15,89,246                           ; mulps         %xmm6,%xmm6
   DB  15,88,240                           ; addps         %xmm0,%xmm6
-  DB  232,0,0,0,0                         ; call          6d3a <_sk_xy_to_2pt_conical_quadratic_max_sse2+0x31>
+  DB  232,0,0,0,0                         ; call          6ce2 <_sk_xy_to_2pt_conical_quadratic_max_sse2+0x31>
   DB  90                                  ; pop           %edx
   DB  243,15,89,237                       ; mulss         %xmm5,%xmm5
   DB  15,198,237,0                        ; shufps        $0x0,%xmm5,%xmm5
   DB  15,92,245                           ; subps         %xmm5,%xmm6
   DB  243,15,16,65,64                     ; movss         0x40(%ecx),%xmm0
   DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
-  DB  15,89,162,230,118,0,0               ; mulps         0x76e6(%edx),%xmm4
-  DB  15,89,130,246,118,0,0               ; mulps         0x76f6(%edx),%xmm0
+  DB  15,89,162,238,118,0,0               ; mulps         0x76ee(%edx),%xmm4
+  DB  15,89,130,254,118,0,0               ; mulps         0x76fe(%edx),%xmm0
   DB  15,89,198                           ; mulps         %xmm6,%xmm0
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
   DB  15,89,237                           ; mulps         %xmm5,%xmm5
@@ -55123,11 +55092,11 @@
   DB  15,81,237                           ; sqrtps        %xmm5,%xmm5
   DB  15,40,197                           ; movaps        %xmm5,%xmm0
   DB  15,92,196                           ; subps         %xmm4,%xmm0
-  DB  15,87,162,150,117,0,0               ; xorps         0x7596(%edx),%xmm4
+  DB  15,87,162,238,117,0,0               ; xorps         0x75ee(%edx),%xmm4
   DB  15,92,229                           ; subps         %xmm5,%xmm4
   DB  243,15,16,105,68                    ; movss         0x44(%ecx),%xmm5
   DB  15,198,237,0                        ; shufps        $0x0,%xmm5,%xmm5
-  DB  15,89,170,150,112,0,0               ; mulps         0x7096(%edx),%xmm5
+  DB  15,89,170,238,112,0,0               ; mulps         0x70ee(%edx),%xmm5
   DB  15,89,197                           ; mulps         %xmm5,%xmm0
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
   DB  15,95,196                           ; maxps         %xmm4,%xmm0
@@ -55156,15 +55125,15 @@
   DB  15,40,241                           ; movaps        %xmm1,%xmm6
   DB  15,89,246                           ; mulps         %xmm6,%xmm6
   DB  15,88,240                           ; addps         %xmm0,%xmm6
-  DB  232,0,0,0,0                         ; call          6dd8 <_sk_xy_to_2pt_conical_quadratic_min_sse2+0x31>
+  DB  232,0,0,0,0                         ; call          6d80 <_sk_xy_to_2pt_conical_quadratic_min_sse2+0x31>
   DB  90                                  ; pop           %edx
   DB  243,15,89,237                       ; mulss         %xmm5,%xmm5
   DB  15,198,237,0                        ; shufps        $0x0,%xmm5,%xmm5
   DB  15,92,245                           ; subps         %xmm5,%xmm6
   DB  243,15,16,65,64                     ; movss         0x40(%ecx),%xmm0
   DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
-  DB  15,89,162,72,118,0,0                ; mulps         0x7648(%edx),%xmm4
-  DB  15,89,130,88,118,0,0                ; mulps         0x7658(%edx),%xmm0
+  DB  15,89,162,80,118,0,0                ; mulps         0x7650(%edx),%xmm4
+  DB  15,89,130,96,118,0,0                ; mulps         0x7660(%edx),%xmm0
   DB  15,89,198                           ; mulps         %xmm6,%xmm0
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
   DB  15,89,237                           ; mulps         %xmm5,%xmm5
@@ -55172,11 +55141,11 @@
   DB  15,81,237                           ; sqrtps        %xmm5,%xmm5
   DB  15,40,197                           ; movaps        %xmm5,%xmm0
   DB  15,92,196                           ; subps         %xmm4,%xmm0
-  DB  15,87,162,248,116,0,0               ; xorps         0x74f8(%edx),%xmm4
+  DB  15,87,162,80,117,0,0                ; xorps         0x7550(%edx),%xmm4
   DB  15,92,229                           ; subps         %xmm5,%xmm4
   DB  243,15,16,105,68                    ; movss         0x44(%ecx),%xmm5
   DB  15,198,237,0                        ; shufps        $0x0,%xmm5,%xmm5
-  DB  15,89,170,248,111,0,0               ; mulps         0x6ff8(%edx),%xmm5
+  DB  15,89,170,80,112,0,0                ; mulps         0x7050(%edx),%xmm5
   DB  15,89,197                           ; mulps         %xmm5,%xmm0
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
   DB  15,93,196                           ; minps         %xmm4,%xmm0
@@ -55194,7 +55163,7 @@
   DB  85                                  ; push          %ebp
   DB  137,229                             ; mov           %esp,%ebp
   DB  131,236,8                           ; sub           $0x8,%esp
-  DB  232,0,0,0,0                         ; call          6e50 <_sk_xy_to_2pt_conical_linear_sse2+0xb>
+  DB  232,0,0,0,0                         ; call          6df8 <_sk_xy_to_2pt_conical_linear_sse2+0xb>
   DB  89                                  ; pop           %ecx
   DB  139,69,12                           ; mov           0xc(%ebp),%eax
   DB  139,16                              ; mov           (%eax),%edx
@@ -55203,7 +55172,7 @@
   DB  243,15,89,236                       ; mulss         %xmm4,%xmm5
   DB  15,198,237,0                        ; shufps        $0x0,%xmm5,%xmm5
   DB  15,88,232                           ; addps         %xmm0,%xmm5
-  DB  15,89,169,208,117,0,0               ; mulps         0x75d0(%ecx),%xmm5
+  DB  15,89,169,216,117,0,0               ; mulps         0x75d8(%ecx),%xmm5
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  15,40,241                           ; movaps        %xmm1,%xmm6
   DB  15,89,246                           ; mulps         %xmm6,%xmm6
@@ -55211,7 +55180,7 @@
   DB  243,15,89,228                       ; mulss         %xmm4,%xmm4
   DB  15,198,228,0                        ; shufps        $0x0,%xmm4,%xmm4
   DB  15,92,196                           ; subps         %xmm4,%xmm0
-  DB  15,87,129,128,116,0,0               ; xorps         0x7480(%ecx),%xmm0
+  DB  15,87,129,216,116,0,0               ; xorps         0x74d8(%ecx),%xmm0
   DB  15,94,197                           ; divps         %xmm5,%xmm0
   DB  141,72,8                            ; lea           0x8(%eax),%ecx
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55282,16 +55251,16 @@
   DB  131,236,24                          ; sub           $0x18,%esp
   DB  15,41,93,232                        ; movaps        %xmm3,-0x18(%ebp)
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
-  DB  232,0,0,0,0                         ; call          6f39 <_sk_save_xy_sse2+0x12>
+  DB  232,0,0,0,0                         ; call          6ee1 <_sk_save_xy_sse2+0x12>
   DB  88                                  ; pop           %eax
-  DB  15,40,160,151,110,0,0               ; movaps        0x6e97(%eax),%xmm4
+  DB  15,40,160,239,110,0,0               ; movaps        0x6eef(%eax),%xmm4
   DB  15,40,232                           ; movaps        %xmm0,%xmm5
   DB  15,88,236                           ; addps         %xmm4,%xmm5
   DB  243,15,91,245                       ; cvttps2dq     %xmm5,%xmm6
   DB  15,91,246                           ; cvtdq2ps      %xmm6,%xmm6
   DB  15,40,253                           ; movaps        %xmm5,%xmm7
   DB  15,194,254,1                        ; cmpltps       %xmm6,%xmm7
-  DB  15,40,144,167,110,0,0               ; movaps        0x6ea7(%eax),%xmm2
+  DB  15,40,144,255,110,0,0               ; movaps        0x6eff(%eax),%xmm2
   DB  15,84,250                           ; andps         %xmm2,%xmm7
   DB  15,92,247                           ; subps         %xmm7,%xmm6
   DB  15,92,238                           ; subps         %xmm6,%xmm5
@@ -55362,15 +55331,15 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          7021 <_sk_bilinear_nx_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          6fc9 <_sk_bilinear_nx_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,6                             ; movups        (%esi),%xmm0
   DB  15,16,166,128,0,0,0                 ; movups        0x80(%esi),%xmm4
-  DB  15,88,128,31,116,0,0                ; addps         0x741f(%eax),%xmm0
-  DB  15,40,168,191,109,0,0               ; movaps        0x6dbf(%eax),%xmm5
+  DB  15,88,128,39,116,0,0                ; addps         0x7427(%eax),%xmm0
+  DB  15,40,168,23,110,0,0                ; movaps        0x6e17(%eax),%xmm5
   DB  15,92,236                           ; subps         %xmm4,%xmm5
   DB  15,17,174,0,1,0,0                   ; movups        %xmm5,0x100(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
@@ -55389,14 +55358,14 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          7067 <_sk_bilinear_px_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          700f <_sk_bilinear_px_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,6                             ; movups        (%esi),%xmm0
   DB  15,16,166,128,0,0,0                 ; movups        0x80(%esi),%xmm4
-  DB  15,88,128,105,109,0,0               ; addps         0x6d69(%eax),%xmm0
+  DB  15,88,128,193,109,0,0               ; addps         0x6dc1(%eax),%xmm0
   DB  15,17,166,0,1,0,0                   ; movups        %xmm4,0x100(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55414,15 +55383,15 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          70a3 <_sk_bilinear_ny_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          704b <_sk_bilinear_ny_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,78,64                         ; movups        0x40(%esi),%xmm1
   DB  15,16,166,192,0,0,0                 ; movups        0xc0(%esi),%xmm4
-  DB  15,88,136,157,115,0,0               ; addps         0x739d(%eax),%xmm1
-  DB  15,40,168,61,109,0,0                ; movaps        0x6d3d(%eax),%xmm5
+  DB  15,88,136,165,115,0,0               ; addps         0x73a5(%eax),%xmm1
+  DB  15,40,168,149,109,0,0               ; movaps        0x6d95(%eax),%xmm5
   DB  15,92,236                           ; subps         %xmm4,%xmm5
   DB  15,17,174,64,1,0,0                  ; movups        %xmm5,0x140(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
@@ -55441,14 +55410,14 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          70ea <_sk_bilinear_py_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          7092 <_sk_bilinear_py_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,78,64                         ; movups        0x40(%esi),%xmm1
   DB  15,16,166,192,0,0,0                 ; movups        0xc0(%esi),%xmm4
-  DB  15,88,136,230,108,0,0               ; addps         0x6ce6(%eax),%xmm1
+  DB  15,88,136,62,109,0,0                ; addps         0x6d3e(%eax),%xmm1
   DB  15,17,166,64,1,0,0                  ; movups        %xmm4,0x140(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55466,20 +55435,20 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          7127 <_sk_bicubic_n3x_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          70cf <_sk_bicubic_n3x_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,6                             ; movups        (%esi),%xmm0
   DB  15,16,166,128,0,0,0                 ; movups        0x80(%esi),%xmm4
-  DB  15,88,128,41,115,0,0                ; addps         0x7329(%eax),%xmm0
-  DB  15,40,168,185,108,0,0               ; movaps        0x6cb9(%eax),%xmm5
+  DB  15,88,128,49,115,0,0                ; addps         0x7331(%eax),%xmm0
+  DB  15,40,168,17,109,0,0                ; movaps        0x6d11(%eax),%xmm5
   DB  15,92,236                           ; subps         %xmm4,%xmm5
   DB  15,40,229                           ; movaps        %xmm5,%xmm4
   DB  15,89,228                           ; mulps         %xmm4,%xmm4
-  DB  15,89,168,57,115,0,0                ; mulps         0x7339(%eax),%xmm5
-  DB  15,88,168,169,110,0,0               ; addps         0x6ea9(%eax),%xmm5
+  DB  15,89,168,65,115,0,0                ; mulps         0x7341(%eax),%xmm5
+  DB  15,88,168,1,111,0,0                 ; addps         0x6f01(%eax),%xmm5
   DB  15,89,236                           ; mulps         %xmm4,%xmm5
   DB  15,17,174,0,1,0,0                   ; movups        %xmm5,0x100(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
@@ -55498,23 +55467,23 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          7184 <_sk_bicubic_n1x_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          712c <_sk_bicubic_n1x_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,6                             ; movups        (%esi),%xmm0
   DB  15,16,166,128,0,0,0                 ; movups        0x80(%esi),%xmm4
-  DB  15,88,128,188,114,0,0               ; addps         0x72bc(%eax),%xmm0
-  DB  15,40,168,92,108,0,0                ; movaps        0x6c5c(%eax),%xmm5
+  DB  15,88,128,196,114,0,0               ; addps         0x72c4(%eax),%xmm0
+  DB  15,40,168,180,108,0,0               ; movaps        0x6cb4(%eax),%xmm5
   DB  15,92,236                           ; subps         %xmm4,%xmm5
-  DB  15,40,160,236,114,0,0               ; movaps        0x72ec(%eax),%xmm4
+  DB  15,40,160,244,114,0,0               ; movaps        0x72f4(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
-  DB  15,88,160,252,114,0,0               ; addps         0x72fc(%eax),%xmm4
+  DB  15,88,160,4,115,0,0                 ; addps         0x7304(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
-  DB  15,88,160,76,108,0,0                ; addps         0x6c4c(%eax),%xmm4
+  DB  15,88,160,164,108,0,0               ; addps         0x6ca4(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
-  DB  15,88,160,12,115,0,0                ; addps         0x730c(%eax),%xmm4
+  DB  15,88,160,20,115,0,0                ; addps         0x7314(%eax),%xmm4
   DB  15,17,166,0,1,0,0                   ; movups        %xmm4,0x100(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55532,22 +55501,22 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          71ef <_sk_bicubic_p1x_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          7197 <_sk_bicubic_p1x_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
-  DB  15,40,160,225,107,0,0               ; movaps        0x6be1(%eax),%xmm4
+  DB  15,40,160,57,108,0,0                ; movaps        0x6c39(%eax),%xmm4
   DB  15,16,6                             ; movups        (%esi),%xmm0
   DB  15,16,174,128,0,0,0                 ; movups        0x80(%esi),%xmm5
   DB  15,88,196                           ; addps         %xmm4,%xmm0
-  DB  15,40,176,129,114,0,0               ; movaps        0x7281(%eax),%xmm6
+  DB  15,40,176,137,114,0,0               ; movaps        0x7289(%eax),%xmm6
   DB  15,89,245                           ; mulps         %xmm5,%xmm6
-  DB  15,88,176,145,114,0,0               ; addps         0x7291(%eax),%xmm6
+  DB  15,88,176,153,114,0,0               ; addps         0x7299(%eax),%xmm6
   DB  15,89,245                           ; mulps         %xmm5,%xmm6
   DB  15,88,244                           ; addps         %xmm4,%xmm6
   DB  15,89,245                           ; mulps         %xmm5,%xmm6
-  DB  15,88,176,161,114,0,0               ; addps         0x72a1(%eax),%xmm6
+  DB  15,88,176,169,114,0,0               ; addps         0x72a9(%eax),%xmm6
   DB  15,17,182,0,1,0,0                   ; movups        %xmm6,0x100(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55565,18 +55534,18 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          724f <_sk_bicubic_p3x_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          71f7 <_sk_bicubic_p3x_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,6                             ; movups        (%esi),%xmm0
   DB  15,16,166,128,0,0,0                 ; movups        0x80(%esi),%xmm4
-  DB  15,88,128,49,114,0,0                ; addps         0x7231(%eax),%xmm0
+  DB  15,88,128,57,114,0,0                ; addps         0x7239(%eax),%xmm0
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
   DB  15,89,237                           ; mulps         %xmm5,%xmm5
-  DB  15,89,160,17,114,0,0                ; mulps         0x7211(%eax),%xmm4
-  DB  15,88,160,129,109,0,0               ; addps         0x6d81(%eax),%xmm4
+  DB  15,89,160,25,114,0,0                ; mulps         0x7219(%eax),%xmm4
+  DB  15,88,160,217,109,0,0               ; addps         0x6dd9(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
   DB  15,17,166,0,1,0,0                   ; movups        %xmm4,0x100(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
@@ -55595,20 +55564,20 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          72a2 <_sk_bicubic_n3y_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          724a <_sk_bicubic_n3y_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,78,64                         ; movups        0x40(%esi),%xmm1
   DB  15,16,166,192,0,0,0                 ; movups        0xc0(%esi),%xmm4
-  DB  15,88,136,174,113,0,0               ; addps         0x71ae(%eax),%xmm1
-  DB  15,40,168,62,107,0,0                ; movaps        0x6b3e(%eax),%xmm5
+  DB  15,88,136,182,113,0,0               ; addps         0x71b6(%eax),%xmm1
+  DB  15,40,168,150,107,0,0               ; movaps        0x6b96(%eax),%xmm5
   DB  15,92,236                           ; subps         %xmm4,%xmm5
   DB  15,40,229                           ; movaps        %xmm5,%xmm4
   DB  15,89,228                           ; mulps         %xmm4,%xmm4
-  DB  15,89,168,190,113,0,0               ; mulps         0x71be(%eax),%xmm5
-  DB  15,88,168,46,109,0,0                ; addps         0x6d2e(%eax),%xmm5
+  DB  15,89,168,198,113,0,0               ; mulps         0x71c6(%eax),%xmm5
+  DB  15,88,168,134,109,0,0               ; addps         0x6d86(%eax),%xmm5
   DB  15,89,236                           ; mulps         %xmm4,%xmm5
   DB  15,17,174,64,1,0,0                  ; movups        %xmm5,0x140(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
@@ -55627,23 +55596,23 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          7300 <_sk_bicubic_n1y_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          72a8 <_sk_bicubic_n1y_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,78,64                         ; movups        0x40(%esi),%xmm1
   DB  15,16,166,192,0,0,0                 ; movups        0xc0(%esi),%xmm4
-  DB  15,88,136,64,113,0,0                ; addps         0x7140(%eax),%xmm1
-  DB  15,40,168,224,106,0,0               ; movaps        0x6ae0(%eax),%xmm5
+  DB  15,88,136,72,113,0,0                ; addps         0x7148(%eax),%xmm1
+  DB  15,40,168,56,107,0,0                ; movaps        0x6b38(%eax),%xmm5
   DB  15,92,236                           ; subps         %xmm4,%xmm5
-  DB  15,40,160,112,113,0,0               ; movaps        0x7170(%eax),%xmm4
+  DB  15,40,160,120,113,0,0               ; movaps        0x7178(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
-  DB  15,88,160,128,113,0,0               ; addps         0x7180(%eax),%xmm4
+  DB  15,88,160,136,113,0,0               ; addps         0x7188(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
-  DB  15,88,160,208,106,0,0               ; addps         0x6ad0(%eax),%xmm4
+  DB  15,88,160,40,107,0,0                ; addps         0x6b28(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
-  DB  15,88,160,144,113,0,0               ; addps         0x7190(%eax),%xmm4
+  DB  15,88,160,152,113,0,0               ; addps         0x7198(%eax),%xmm4
   DB  15,17,166,64,1,0,0                  ; movups        %xmm4,0x140(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55661,22 +55630,22 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          736c <_sk_bicubic_p1y_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          7314 <_sk_bicubic_p1y_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
-  DB  15,40,160,100,106,0,0               ; movaps        0x6a64(%eax),%xmm4
+  DB  15,40,160,188,106,0,0               ; movaps        0x6abc(%eax),%xmm4
   DB  15,16,78,64                         ; movups        0x40(%esi),%xmm1
   DB  15,16,174,192,0,0,0                 ; movups        0xc0(%esi),%xmm5
   DB  15,88,204                           ; addps         %xmm4,%xmm1
-  DB  15,40,176,4,113,0,0                 ; movaps        0x7104(%eax),%xmm6
+  DB  15,40,176,12,113,0,0                ; movaps        0x710c(%eax),%xmm6
   DB  15,89,245                           ; mulps         %xmm5,%xmm6
-  DB  15,88,176,20,113,0,0                ; addps         0x7114(%eax),%xmm6
+  DB  15,88,176,28,113,0,0                ; addps         0x711c(%eax),%xmm6
   DB  15,89,245                           ; mulps         %xmm5,%xmm6
   DB  15,88,244                           ; addps         %xmm4,%xmm6
   DB  15,89,245                           ; mulps         %xmm5,%xmm6
-  DB  15,88,176,36,113,0,0                ; addps         0x7124(%eax),%xmm6
+  DB  15,88,176,44,113,0,0                ; addps         0x712c(%eax),%xmm6
   DB  15,17,182,64,1,0,0                  ; movups        %xmm6,0x140(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
@@ -55694,18 +55663,18 @@
   DB  137,229                             ; mov           %esp,%ebp
   DB  86                                  ; push          %esi
   DB  80                                  ; push          %eax
-  DB  232,0,0,0,0                         ; call          73cd <_sk_bicubic_p3y_sse2+0xa>
+  DB  232,0,0,0,0                         ; call          7375 <_sk_bicubic_p3y_sse2+0xa>
   DB  88                                  ; pop           %eax
   DB  139,77,8                            ; mov           0x8(%ebp),%ecx
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  139,50                              ; mov           (%edx),%esi
   DB  15,16,78,64                         ; movups        0x40(%esi),%xmm1
   DB  15,16,166,192,0,0,0                 ; movups        0xc0(%esi),%xmm4
-  DB  15,88,136,179,112,0,0               ; addps         0x70b3(%eax),%xmm1
+  DB  15,88,136,187,112,0,0               ; addps         0x70bb(%eax),%xmm1
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
   DB  15,89,237                           ; mulps         %xmm5,%xmm5
-  DB  15,89,160,147,112,0,0               ; mulps         0x7093(%eax),%xmm4
-  DB  15,88,160,3,108,0,0                 ; addps         0x6c03(%eax),%xmm4
+  DB  15,89,160,155,112,0,0               ; mulps         0x709b(%eax),%xmm4
+  DB  15,88,160,91,108,0,0                ; addps         0x6c5b(%eax),%xmm4
   DB  15,89,229                           ; mulps         %xmm5,%xmm4
   DB  15,17,166,64,1,0,0                  ; movups        %xmm4,0x140(%esi)
   DB  141,66,8                            ; lea           0x8(%edx),%eax
@@ -55844,7 +55813,7 @@
   DB  102,15,112,192,232                  ; pshufd        $0xe8,%xmm0,%xmm0
   DB  102,15,98,200                       ; punpckldq     %xmm0,%xmm1
   DB  102,15,127,77,136                   ; movdqa        %xmm1,-0x78(%ebp)
-  DB  232,0,0,0,0                         ; call          75b3 <_sk_clut_3D_sse2+0xee>
+  DB  232,0,0,0,0                         ; call          755b <_sk_clut_3D_sse2+0xee>
   DB  90                                  ; pop           %edx
   DB  243,15,91,196                       ; cvttps2dq     %xmm4,%xmm0
   DB  102,15,127,133,8,255,255,255        ; movdqa        %xmm0,-0xf8(%ebp)
@@ -55852,7 +55821,7 @@
   DB  102,15,254,216                      ; paddd         %xmm0,%xmm3
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,15,254,195                      ; paddd         %xmm3,%xmm0
-  DB  102,15,111,162,253,110,0,0          ; movdqa        0x6efd(%edx),%xmm4
+  DB  102,15,111,162,5,111,0,0            ; movdqa        0x6f05(%edx),%xmm4
   DB  102,15,127,165,120,255,255,255      ; movdqa        %xmm4,-0x88(%ebp)
   DB  102,15,112,200,245                  ; pshufd        $0xf5,%xmm0,%xmm1
   DB  102,15,244,196                      ; pmuludq       %xmm4,%xmm0
@@ -55892,7 +55861,7 @@
   DB  243,15,16,4,129                     ; movss         (%ecx,%eax,4),%xmm0
   DB  15,20,248                           ; unpcklps      %xmm0,%xmm7
   DB  102,15,20,252                       ; unpcklpd      %xmm4,%xmm7
-  DB  102,15,111,130,109,104,0,0          ; movdqa        0x686d(%edx),%xmm0
+  DB  102,15,111,130,197,104,0,0          ; movdqa        0x68c5(%edx),%xmm0
   DB  102,15,127,133,104,255,255,255      ; movdqa        %xmm0,-0x98(%ebp)
   DB  102,15,254,200                      ; paddd         %xmm0,%xmm1
   DB  102,15,112,193,229                  ; pshufd        $0xe5,%xmm1,%xmm0
@@ -55909,7 +55878,7 @@
   DB  243,15,16,4,129                     ; movss         (%ecx,%eax,4),%xmm0
   DB  15,20,240                           ; unpcklps      %xmm0,%xmm6
   DB  102,15,20,241                       ; unpcklpd      %xmm1,%xmm6
-  DB  15,40,130,237,110,0,0               ; movaps        0x6eed(%edx),%xmm0
+  DB  15,40,130,245,110,0,0               ; movaps        0x6ef5(%edx),%xmm0
   DB  15,40,141,248,254,255,255           ; movaps        -0x108(%ebp),%xmm1
   DB  15,88,200                           ; addps         %xmm0,%xmm1
   DB  15,41,77,200                        ; movaps        %xmm1,-0x38(%ebp)
@@ -56527,7 +56496,7 @@
   DB  102,15,112,201,232                  ; pshufd        $0xe8,%xmm1,%xmm1
   DB  102,15,98,217                       ; punpckldq     %xmm1,%xmm3
   DB  102,15,127,93,152                   ; movdqa        %xmm3,-0x68(%ebp)
-  DB  232,0,0,0,0                         ; call          813f <_sk_clut_4D_sse2+0x165>
+  DB  232,0,0,0,0                         ; call          80e7 <_sk_clut_4D_sse2+0x165>
   DB  89                                  ; pop           %ecx
   DB  15,40,197                           ; movaps        %xmm5,%xmm0
   DB  15,41,133,168,254,255,255           ; movaps        %xmm0,-0x158(%ebp)
@@ -56538,7 +56507,7 @@
   DB  102,15,111,212                      ; movdqa        %xmm4,%xmm2
   DB  102,15,254,214                      ; paddd         %xmm6,%xmm2
   DB  102,15,254,218                      ; paddd         %xmm2,%xmm3
-  DB  102,15,111,161,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm4
+  DB  102,15,111,161,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm4
   DB  102,15,112,203,245                  ; pshufd        $0xf5,%xmm3,%xmm1
   DB  102,15,244,220                      ; pmuludq       %xmm4,%xmm3
   DB  102,15,244,204                      ; pmuludq       %xmm4,%xmm1
@@ -56580,7 +56549,7 @@
   DB  15,20,223                           ; unpcklps      %xmm7,%xmm3
   DB  102,15,20,222                       ; unpcklpd      %xmm6,%xmm3
   DB  102,15,41,157,232,254,255,255       ; movapd        %xmm3,-0x118(%ebp)
-  DB  102,15,254,137,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm1
+  DB  102,15,254,137,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm1
   DB  102,15,112,241,229                  ; pshufd        $0xe5,%xmm1,%xmm6
   DB  102,15,126,240                      ; movd          %xmm6,%eax
   DB  102,15,112,241,78                   ; pshufd        $0x4e,%xmm1,%xmm6
@@ -56595,7 +56564,7 @@
   DB  243,15,16,60,130                    ; movss         (%edx,%eax,4),%xmm7
   DB  15,20,223                           ; unpcklps      %xmm7,%xmm3
   DB  102,15,20,222                       ; unpcklpd      %xmm6,%xmm3
-  DB  15,40,185,97,99,0,0                 ; movaps        0x6361(%ecx),%xmm7
+  DB  15,40,185,105,99,0,0                ; movaps        0x6369(%ecx),%xmm7
   DB  15,88,199                           ; addps         %xmm7,%xmm0
   DB  15,41,133,120,255,255,255           ; movaps        %xmm0,-0x88(%ebp)
   DB  15,40,133,248,254,255,255           ; movaps        -0x108(%ebp),%xmm0
@@ -56616,7 +56585,7 @@
   DB  102,15,127,101,168                  ; movdqa        %xmm4,-0x58(%ebp)
   DB  102,15,254,212                      ; paddd         %xmm4,%xmm2
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,161,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm4
+  DB  102,15,111,161,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm4
   DB  102,15,244,212                      ; pmuludq       %xmm4,%xmm2
   DB  102,15,244,196                      ; pmuludq       %xmm4,%xmm0
   DB  102,15,112,240,232                  ; pshufd        $0xe8,%xmm0,%xmm6
@@ -56652,7 +56621,7 @@
   DB  243,15,16,44,130                    ; movss         (%edx,%eax,4),%xmm5
   DB  15,20,213                           ; unpcklps      %xmm5,%xmm2
   DB  102,15,20,214                       ; unpcklpd      %xmm6,%xmm2
-  DB  102,15,254,129,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm0
+  DB  102,15,254,129,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm0
   DB  102,15,112,232,229                  ; pshufd        $0xe5,%xmm0,%xmm5
   DB  102,15,126,232                      ; movd          %xmm5,%eax
   DB  102,15,112,232,78                   ; pshufd        $0x4e,%xmm0,%xmm5
@@ -56736,7 +56705,7 @@
   DB  243,15,16,44,130                    ; movss         (%edx,%eax,4),%xmm5
   DB  15,20,245                           ; unpcklps      %xmm5,%xmm6
   DB  102,15,20,240                       ; unpcklpd      %xmm0,%xmm6
-  DB  102,15,254,137,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm1
+  DB  102,15,254,137,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm1
   DB  102,15,112,193,229                  ; pshufd        $0xe5,%xmm1,%xmm0
   DB  102,15,126,192                      ; movd          %xmm0,%eax
   DB  102,15,112,193,78                   ; pshufd        $0x4e,%xmm1,%xmm0
@@ -56754,7 +56723,7 @@
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
   DB  102,15,254,77,168                   ; paddd         -0x58(%ebp),%xmm1
   DB  102,15,112,193,245                  ; pshufd        $0xf5,%xmm1,%xmm0
-  DB  102,15,111,145,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm2
+  DB  102,15,111,145,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm2
   DB  102,15,244,202                      ; pmuludq       %xmm2,%xmm1
   DB  102,15,244,194                      ; pmuludq       %xmm2,%xmm0
   DB  102,15,112,192,232                  ; pshufd        $0xe8,%xmm0,%xmm0
@@ -56790,7 +56759,7 @@
   DB  243,15,16,4,130                     ; movss         (%edx,%eax,4),%xmm0
   DB  15,20,216                           ; unpcklps      %xmm0,%xmm3
   DB  102,15,20,217                       ; unpcklpd      %xmm1,%xmm3
-  DB  102,15,254,169,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm5
+  DB  102,15,254,169,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm5
   DB  102,15,112,197,229                  ; pshufd        $0xe5,%xmm5,%xmm0
   DB  102,15,126,192                      ; movd          %xmm0,%eax
   DB  102,15,112,197,78                   ; pshufd        $0x4e,%xmm5,%xmm0
@@ -56850,7 +56819,7 @@
   DB  102,15,111,209                      ; movdqa        %xmm1,%xmm2
   DB  102,15,254,85,152                   ; paddd         -0x68(%ebp),%xmm2
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,153,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm3
+  DB  102,15,111,153,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm3
   DB  102,15,244,211                      ; pmuludq       %xmm3,%xmm2
   DB  102,15,244,195                      ; pmuludq       %xmm3,%xmm0
   DB  102,15,111,251                      ; movdqa        %xmm3,%xmm7
@@ -56889,7 +56858,7 @@
   DB  243,15,16,44,130                    ; movss         (%edx,%eax,4),%xmm5
   DB  15,20,229                           ; unpcklps      %xmm5,%xmm4
   DB  102,15,20,227                       ; unpcklpd      %xmm3,%xmm4
-  DB  102,15,254,129,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm0
+  DB  102,15,254,129,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm0
   DB  102,15,112,216,229                  ; pshufd        $0xe5,%xmm0,%xmm3
   DB  102,15,126,216                      ; movd          %xmm3,%eax
   DB  102,15,112,216,78                   ; pshufd        $0x4e,%xmm0,%xmm3
@@ -56941,7 +56910,7 @@
   DB  243,15,16,52,130                    ; movss         (%edx,%eax,4),%xmm6
   DB  15,20,238                           ; unpcklps      %xmm6,%xmm5
   DB  102,15,20,235                       ; unpcklpd      %xmm3,%xmm5
-  DB  102,15,111,137,225,92,0,0           ; movdqa        0x5ce1(%ecx),%xmm1
+  DB  102,15,111,137,57,93,0,0            ; movdqa        0x5d39(%ecx),%xmm1
   DB  102,15,254,249                      ; paddd         %xmm1,%xmm7
   DB  102,15,112,223,229                  ; pshufd        $0xe5,%xmm7,%xmm3
   DB  102,15,126,216                      ; movd          %xmm3,%eax
@@ -56975,7 +56944,7 @@
   DB  102,15,111,226                      ; movdqa        %xmm2,%xmm4
   DB  102,15,254,69,152                   ; paddd         -0x68(%ebp),%xmm0
   DB  102,15,112,208,245                  ; pshufd        $0xf5,%xmm0,%xmm2
-  DB  102,15,111,153,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm3
+  DB  102,15,111,153,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm3
   DB  102,15,244,195                      ; pmuludq       %xmm3,%xmm0
   DB  102,15,244,211                      ; pmuludq       %xmm3,%xmm2
   DB  102,15,112,218,232                  ; pshufd        $0xe8,%xmm2,%xmm3
@@ -57032,7 +57001,7 @@
   DB  102,15,111,212                      ; movdqa        %xmm4,%xmm2
   DB  102,15,254,85,168                   ; paddd         -0x58(%ebp),%xmm2
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,137,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm1
+  DB  102,15,111,137,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm1
   DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
   DB  102,15,244,193                      ; pmuludq       %xmm1,%xmm0
   DB  102,15,112,240,232                  ; pshufd        $0xe8,%xmm0,%xmm6
@@ -57069,7 +57038,7 @@
   DB  243,15,16,36,130                    ; movss         (%edx,%eax,4),%xmm4
   DB  15,20,212                           ; unpcklps      %xmm4,%xmm2
   DB  102,15,20,214                       ; unpcklpd      %xmm6,%xmm2
-  DB  102,15,254,129,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm0
+  DB  102,15,254,129,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm0
   DB  102,15,112,224,229                  ; pshufd        $0xe5,%xmm0,%xmm4
   DB  102,15,126,224                      ; movd          %xmm4,%eax
   DB  102,15,112,224,78                   ; pshufd        $0x4e,%xmm0,%xmm4
@@ -57137,7 +57106,7 @@
   DB  102,15,111,93,152                   ; movdqa        -0x68(%ebp),%xmm3
   DB  102,15,254,217                      ; paddd         %xmm1,%xmm3
   DB  102,15,112,211,245                  ; pshufd        $0xf5,%xmm3,%xmm2
-  DB  102,15,111,129,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm0
+  DB  102,15,111,129,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm0
   DB  102,15,244,216                      ; pmuludq       %xmm0,%xmm3
   DB  102,15,244,208                      ; pmuludq       %xmm0,%xmm2
   DB  102,15,111,248                      ; movdqa        %xmm0,%xmm7
@@ -57176,7 +57145,7 @@
   DB  243,15,16,28,130                    ; movss         (%edx,%eax,4),%xmm3
   DB  15,20,235                           ; unpcklps      %xmm3,%xmm5
   DB  102,15,20,238                       ; unpcklpd      %xmm6,%xmm5
-  DB  102,15,254,145,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm2
+  DB  102,15,254,145,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm2
   DB  102,15,112,218,229                  ; pshufd        $0xe5,%xmm2,%xmm3
   DB  102,15,126,216                      ; movd          %xmm3,%eax
   DB  102,15,112,218,78                   ; pshufd        $0x4e,%xmm2,%xmm3
@@ -57228,7 +57197,7 @@
   DB  243,15,16,4,130                     ; movss         (%edx,%eax,4),%xmm0
   DB  15,20,208                           ; unpcklps      %xmm0,%xmm2
   DB  102,15,20,215                       ; unpcklpd      %xmm7,%xmm2
-  DB  102,15,111,137,225,92,0,0           ; movdqa        0x5ce1(%ecx),%xmm1
+  DB  102,15,111,137,57,93,0,0            ; movdqa        0x5d39(%ecx),%xmm1
   DB  102,15,254,217                      ; paddd         %xmm1,%xmm3
   DB  102,15,112,195,229                  ; pshufd        $0xe5,%xmm3,%xmm0
   DB  102,15,126,192                      ; movd          %xmm0,%eax
@@ -57263,7 +57232,7 @@
   DB  102,15,111,194                      ; movdqa        %xmm2,%xmm0
   DB  102,15,254,69,152                   ; paddd         -0x68(%ebp),%xmm0
   DB  102,15,112,224,245                  ; pshufd        $0xf5,%xmm0,%xmm4
-  DB  102,15,111,169,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm5
+  DB  102,15,111,169,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm5
   DB  102,15,111,221                      ; movdqa        %xmm5,%xmm3
   DB  102,15,244,195                      ; pmuludq       %xmm3,%xmm0
   DB  102,15,244,227                      ; pmuludq       %xmm3,%xmm4
@@ -57319,7 +57288,7 @@
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
   DB  102,15,254,77,168                   ; paddd         -0x58(%ebp),%xmm1
   DB  102,15,112,193,245                  ; pshufd        $0xf5,%xmm1,%xmm0
-  DB  102,15,111,145,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm2
+  DB  102,15,111,145,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm2
   DB  102,15,244,202                      ; pmuludq       %xmm2,%xmm1
   DB  102,15,244,194                      ; pmuludq       %xmm2,%xmm0
   DB  102,15,112,192,232                  ; pshufd        $0xe8,%xmm0,%xmm0
@@ -57355,7 +57324,7 @@
   DB  243,15,16,4,130                     ; movss         (%edx,%eax,4),%xmm0
   DB  15,20,216                           ; unpcklps      %xmm0,%xmm3
   DB  102,15,20,217                       ; unpcklpd      %xmm1,%xmm3
-  DB  102,15,254,185,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm7
+  DB  102,15,254,185,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm7
   DB  102,15,112,199,229                  ; pshufd        $0xe5,%xmm7,%xmm0
   DB  102,15,126,192                      ; movd          %xmm0,%eax
   DB  102,15,112,199,78                   ; pshufd        $0x4e,%xmm7,%xmm0
@@ -57405,7 +57374,7 @@
   DB  102,15,111,233                      ; movdqa        %xmm1,%xmm5
   DB  102,15,254,69,152                   ; paddd         -0x68(%ebp),%xmm0
   DB  102,15,112,200,245                  ; pshufd        $0xf5,%xmm0,%xmm1
-  DB  102,15,111,145,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm2
+  DB  102,15,111,145,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm2
   DB  102,15,244,194                      ; pmuludq       %xmm2,%xmm0
   DB  102,15,244,202                      ; pmuludq       %xmm2,%xmm1
   DB  102,15,111,250                      ; movdqa        %xmm2,%xmm7
@@ -57444,7 +57413,7 @@
   DB  243,15,16,36,130                    ; movss         (%edx,%eax,4),%xmm4
   DB  15,20,204                           ; unpcklps      %xmm4,%xmm1
   DB  102,15,20,203                       ; unpcklpd      %xmm3,%xmm1
-  DB  102,15,254,145,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm2
+  DB  102,15,254,145,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm2
   DB  102,15,112,218,229                  ; pshufd        $0xe5,%xmm2,%xmm3
   DB  102,15,126,216                      ; movd          %xmm3,%eax
   DB  102,15,112,218,78                   ; pshufd        $0x4e,%xmm2,%xmm3
@@ -57498,7 +57467,7 @@
   DB  243,15,16,36,130                    ; movss         (%edx,%eax,4),%xmm4
   DB  15,20,252                           ; unpcklps      %xmm4,%xmm7
   DB  102,15,20,254                       ; unpcklpd      %xmm6,%xmm7
-  DB  102,15,254,153,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm3
+  DB  102,15,254,153,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm3
   DB  102,15,112,227,229                  ; pshufd        $0xe5,%xmm3,%xmm4
   DB  102,15,126,224                      ; movd          %xmm4,%eax
   DB  102,15,112,227,78                   ; pshufd        $0x4e,%xmm3,%xmm4
@@ -57531,7 +57500,7 @@
   DB  102,15,254,208                      ; paddd         %xmm0,%xmm2
   DB  102,15,111,216                      ; movdqa        %xmm0,%xmm3
   DB  102,15,112,194,245                  ; pshufd        $0xf5,%xmm2,%xmm0
-  DB  102,15,111,137,113,99,0,0           ; movdqa        0x6371(%ecx),%xmm1
+  DB  102,15,111,137,121,99,0,0           ; movdqa        0x6379(%ecx),%xmm1
   DB  102,15,244,209                      ; pmuludq       %xmm1,%xmm2
   DB  102,15,244,193                      ; pmuludq       %xmm1,%xmm0
   DB  102,15,111,241                      ; movdqa        %xmm1,%xmm6
@@ -57571,7 +57540,7 @@
   DB  15,20,225                           ; unpcklps      %xmm1,%xmm4
   DB  102,15,20,226                       ; unpcklpd      %xmm2,%xmm4
   DB  102,15,41,165,88,255,255,255        ; movapd        %xmm4,-0xa8(%ebp)
-  DB  102,15,254,129,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm0
+  DB  102,15,254,129,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm0
   DB  102,15,112,200,229                  ; pshufd        $0xe5,%xmm0,%xmm1
   DB  102,15,126,200                      ; movd          %xmm1,%eax
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
@@ -57625,7 +57594,7 @@
   DB  243,15,16,52,130                    ; movss         (%edx,%eax,4),%xmm6
   DB  15,20,206                           ; unpcklps      %xmm6,%xmm1
   DB  102,15,20,203                       ; unpcklpd      %xmm3,%xmm1
-  DB  102,15,254,145,225,92,0,0           ; paddd         0x5ce1(%ecx),%xmm2
+  DB  102,15,254,145,57,93,0,0            ; paddd         0x5d39(%ecx),%xmm2
   DB  102,15,112,218,229                  ; pshufd        $0xe5,%xmm2,%xmm3
   DB  102,15,126,216                      ; movd          %xmm3,%eax
   DB  102,15,112,218,78                   ; pshufd        $0x4e,%xmm2,%xmm3
@@ -57694,7 +57663,7 @@
   DB  139,85,12                           ; mov           0xc(%ebp),%edx
   DB  141,66,8                            ; lea           0x8(%edx),%eax
   DB  131,236,8                           ; sub           $0x8,%esp
-  DB  15,40,153,161,92,0,0                ; movaps        0x5ca1(%ecx),%xmm3
+  DB  15,40,153,249,92,0,0                ; movaps        0x5cf9(%ecx),%xmm3
   DB  80                                  ; push          %eax
   DB  255,117,8                           ; pushl         0x8(%ebp)
   DB  255,82,4                            ; call          *0x4(%edx)
@@ -57704,6 +57673,35 @@
   DB  91                                  ; pop           %ebx
   DB  93                                  ; pop           %ebp
   DB  195                                 ; ret
+
+PUBLIC _sk_gauss_a_to_rgba_sse2
+_sk_gauss_a_to_rgba_sse2 LABEL PROC
+  DB  85                                  ; push          %ebp
+  DB  137,229                             ; mov           %esp,%ebp
+  DB  131,236,8                           ; sub           $0x8,%esp
+  DB  232,0,0,0,0                         ; call          953c <_sk_gauss_a_to_rgba_sse2+0xb>
+  DB  88                                  ; pop           %eax
+  DB  139,77,12                           ; mov           0xc(%ebp),%ecx
+  DB  15,40,128,52,79,0,0                 ; movaps        0x4f34(%eax),%xmm0
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,128,68,79,0,0                 ; addps         0x4f44(%eax),%xmm0
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,128,84,79,0,0                 ; addps         0x4f54(%eax),%xmm0
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,128,100,79,0,0                ; addps         0x4f64(%eax),%xmm0
+  DB  15,89,195                           ; mulps         %xmm3,%xmm0
+  DB  15,88,128,116,79,0,0                ; addps         0x4f74(%eax),%xmm0
+  DB  141,65,4                            ; lea           0x4(%ecx),%eax
+  DB  131,236,8                           ; sub           $0x8,%esp
+  DB  15,40,200                           ; movaps        %xmm0,%xmm1
+  DB  15,40,208                           ; movaps        %xmm0,%xmm2
+  DB  15,40,216                           ; movaps        %xmm0,%xmm3
+  DB  80                                  ; push          %eax
+  DB  255,117,8                           ; pushl         0x8(%ebp)
+  DB  255,17                              ; call          *(%ecx)
+  DB  131,196,24                          ; add           $0x18,%esp
+  DB  93                                  ; pop           %ebp
+  DB  195                                 ; ret
   DB  144                                 ; nop
   DB  144                                 ; nop
   DB  144                                 ; nop
@@ -63645,47 +63643,20 @@
   DB  0,0                                 ; add           %al,(%eax)
   DB  211,128,0,0,211,128                 ; roll          %cl,-0x7f2d0000(%eax)
   DB  0,0                                 ; add           %al,(%eax)
-  DB  211,128,0,0,211,45                  ; roll          %cl,0x2dd30000(%eax)
-  DB  16,17                               ; adc           %dl,(%ecx)
-  DB  192,45,16,17,192,45,16              ; shrb          $0x10,0x2dc01110
-  DB  17,192                              ; adc           %eax,%eax
-  DB  45,16,17,192,18                     ; sub           $0x12c01110,%eax
-  DB  120,57                              ; js            e3cc <.literal16+0x5fc>
-  DB  64                                  ; inc           %eax
-  DB  18,120,57                           ; adc           0x39(%eax),%bh
-  DB  64                                  ; inc           %eax
-  DB  18,120,57                           ; adc           0x39(%eax),%bh
-  DB  64                                  ; inc           %eax
-  DB  18,120,57                           ; adc           0x39(%eax),%bh
-  DB  64                                  ; inc           %eax
-  DB  32,148,90,62,32,148,90              ; and           %dl,0x5a94203e(%edx,%ebx,2)
-  DB  62,32,148,90,62,32,148,90           ; and           %dl,%ds:0x5a94203e(%edx,%ebx,2)
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; push          %ds
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; push          %ds
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; push          %ds
-  DB  62,4,157                            ; ds            add $0x9d,%al
-  DB  30                                  ; push          %ds
-  DB  62,0,24                             ; add           %bl,%ds:(%eax)
-  DB  161,57,0,24,161                     ; mov           0xa1180039,%eax
-  DB  57,0                                ; cmp           %eax,(%eax)
-  DB  24,161,57,0,24,161                  ; sbb           %ah,-0x5ee7ffc7(%ecx)
-  DB  57,111,43                           ; cmp           %ebp,0x2b(%edi)
-  DB  231,187                             ; out           %eax,$0xbb
-  DB  111                                 ; outsl         %ds:(%esi),(%dx)
+  DB  211,128,0,0,211,111                 ; roll          %cl,0x6fd30000(%eax)
   DB  43,231                              ; sub           %edi,%esp
   DB  187,111,43,231,187                  ; mov           $0xbbe72b6f,%ebx
   DB  111                                 ; outsl         %ds:(%esi),(%dx)
   DB  43,231                              ; sub           %edi,%esp
-  DB  187,159,215,202,60                  ; mov           $0x3ccad79f,%ebx
+  DB  187,111,43,231,187                  ; mov           $0xbbe72b6f,%ebx
   DB  159                                 ; lahf
   DB  215                                 ; xlat          %ds:(%ebx)
   DB  202,60,159                          ; lret          $0x9f3c
   DB  215                                 ; xlat          %ds:(%ebx)
   DB  202,60,159                          ; lret          $0x9f3c
   DB  215                                 ; xlat          %ds:(%ebx)
+  DB  202,60,159                          ; lret          $0x9f3c
+  DB  215                                 ; xlat          %ds:(%ebx)
   DB  202,60,212                          ; lret          $0xd43c
   DB  100,84                              ; fs            push %esp
   DB  189,212,100,84,189                  ; mov           $0xbd5464d4,%ebp
@@ -63721,13 +63692,13 @@
   DB  192,191,0,0,192,191,0               ; sarb          $0x0,-0x40400000(%edi)
   DB  0,192                               ; add           %al,%al
   DB  191,0,0,192,191                     ; mov           $0xbfc00000,%edi
-  DB  114,28                              ; jb            e47e <.literal16+0x6ae>
+  DB  114,28                              ; jb            e42e <.literal16+0x65e>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         e482 <.literal16+0x6b2>
+  DB  62,114,28                           ; jb,pt         e432 <.literal16+0x662>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         e486 <.literal16+0x6b6>
+  DB  62,114,28                           ; jb,pt         e436 <.literal16+0x666>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         e48a <.literal16+0x6ba>
+  DB  62,114,28                           ; jb,pt         e43a <.literal16+0x66a>
   DB  199                                 ; (bad)
   DB  62,85                               ; ds            push %ebp
   DB  85                                  ; push          %ebp
@@ -63748,13 +63719,13 @@
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%esi)
   DB  99,61,57,142,99,61                  ; arpl          %di,0x3d638e39
   DB  57,142,99,61,114,249                ; cmp           %ecx,-0x68dc29d(%esi)
-  DB  127,63                              ; jg            e4e3 <.literal16+0x713>
-  DB  114,249                             ; jb            e49f <.literal16+0x6cf>
-  DB  127,63                              ; jg            e4e7 <.literal16+0x717>
-  DB  114,249                             ; jb            e4a3 <.literal16+0x6d3>
-  DB  127,63                              ; jg            e4eb <.literal16+0x71b>
-  DB  114,249                             ; jb            e4a7 <.literal16+0x6d7>
-  DB  127,63                              ; jg            e4ef <.literal16+0x71f>
+  DB  127,63                              ; jg            e493 <.literal16+0x6c3>
+  DB  114,249                             ; jb            e44f <.literal16+0x67f>
+  DB  127,63                              ; jg            e497 <.literal16+0x6c7>
+  DB  114,249                             ; jb            e453 <.literal16+0x683>
+  DB  127,63                              ; jg            e49b <.literal16+0x6cb>
+  DB  114,249                             ; jb            e457 <.literal16+0x687>
+  DB  127,63                              ; jg            e49f <.literal16+0x6cf>
   DB  3,0                                 ; add           (%eax),%eax
   DB  0,0                                 ; add           %al,(%eax)
   DB  3,0                                 ; add           (%eax),%eax
@@ -63763,25 +63734,53 @@
   DB  0,0                                 ; add           %al,(%eax)
   DB  3,0                                 ; add           (%eax),%eax
   DB  0,0                                 ; add           %al,(%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  255,0                               ; incl          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
-  DB  254,0                               ; incb          (%eax)
+  DB  45,16,17,192,45                     ; sub           $0x2dc01110,%eax
+  DB  16,17                               ; adc           %dl,(%ecx)
+  DB  192,45,16,17,192,45,16              ; shrb          $0x10,0x2dc01110
+  DB  17,192                              ; adc           %eax,%eax
+  DB  18,120,57                           ; adc           0x39(%eax),%bh
+  DB  64                                  ; inc           %eax
+  DB  18,120,57                           ; adc           0x39(%eax),%bh
+  DB  64                                  ; inc           %eax
+  DB  18,120,57                           ; adc           0x39(%eax),%bh
+  DB  64                                  ; inc           %eax
+  DB  18,120,57                           ; adc           0x39(%eax),%bh
+  DB  64                                  ; inc           %eax
+  DB  32,148,90,62,32,148,90              ; and           %dl,0x5a94203e(%edx,%ebx,2)
+  DB  62,32,148,90,62,32,148,90           ; and           %dl,%ds:0x5a94203e(%edx,%ebx,2)
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; push          %ds
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; push          %ds
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; push          %ds
+  DB  62,4,157                            ; ds            add $0x9d,%al
+  DB  30                                  ; push          %ds
+  DB  62,0,24                             ; add           %bl,%ds:(%eax)
+  DB  161,57,0,24,161                     ; mov           0xa1180039,%eax
+  DB  57,0                                ; cmp           %eax,(%eax)
+  DB  24,161,57,0,24,161                  ; sbb           %ah,-0x5ee7ffc7(%ecx)
+  DB  57,255                              ; cmp           %edi,%edi
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,255                               ; add           %bh,%bh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,254                               ; add           %bh,%dh
+  DB  0,0                                 ; add           %al,(%eax)
+  DB  128,0,128                           ; addb          $0x80,(%eax)
   DB  0,128,0,128,0,128                   ; add           %al,-0x7fff8000(%eax)
   DB  0,128,0,128,0,128                   ; add           %al,-0x7fff8000(%eax)
-  DB  0,128,0,128,254,1                   ; add           %al,0x1fe8000(%eax)
+  DB  254,1                               ; incb          (%ecx)
   DB  254,1                               ; incb          (%ecx)
   DB  254,1                               ; incb          (%ecx)
   DB  254,1                               ; incb          (%ecx)
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index 9224f2a..7c769fd 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -1198,23 +1198,6 @@
     gradient_lookup(c, idx, t, &r, &g, &b, &a);
 }
 
-// TODO: move this somewhere not in the middle of the gradients...
-STAGE(gauss_a_to_rgba, Ctx::None) {
-    // x = 1 - x;
-    // exp(-x * x * 4) - 0.018f;
-    // ... now approximate with quartic
-    //
-    const float c4 = -2.26661229133605957031f;
-    const float c3 = 2.89795351028442382812f;
-    const float c2 = 0.21345567703247070312f;
-    const float c1 = 0.15489584207534790039f;
-    const float c0 = 0.00030726194381713867f;
-    a = mad(a, mad(a, mad(a, mad(a, c4, c3), c2), c1), c0);
-    r = a;
-    g = a;
-    b = a;
-}
-
 STAGE(gradient, const SkJumper_GradientCtx* c) {
     auto t = r;
     U32 idx = 0;
@@ -1503,3 +1486,19 @@
     // "a" was really CMYK's K, so we just set alpha opaque.
     a = 1.0f;
 }
+
+STAGE(gauss_a_to_rgba, Ctx::None) {
+    // x = 1 - x;
+    // exp(-x * x * 4) - 0.018f;
+    // ... now approximate with quartic
+    //
+    const float c4 = -2.26661229133605957031f;
+    const float c3 = 2.89795351028442382812f;
+    const float c2 = 0.21345567703247070312f;
+    const float c1 = 0.15489584207534790039f;
+    const float c0 = 0.00030726194381713867f;
+    a = mad(a, mad(a, mad(a, mad(a, c4, c3), c2), c1), c0);
+    r = a;
+    g = a;
+    b = a;
+}