Tack on ret instructions to each SkSplicer stage.

This is just a warm up refactor to make the existing stages more
amenable to running in place without splicing.  Hopefully the driver
interpreter loop for that will be my next CL.

Change-Id: Ie76868b59af8ff3a10e2cd64f03c3f8d8a5031dd
Reviewed-on: https://skia-review.googlesource.com/8142
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/src/splicer/SkSplicer.cpp b/src/splicer/SkSplicer.cpp
index a25a578..6a0fc3c 100644
--- a/src/splicer/SkSplicer.cpp
+++ b/src/splicer/SkSplicer.cpp
@@ -49,6 +49,13 @@
         buf->write(&val, sizeof(val));
     }
 
+    // Splice up to (but not including) the final return instruction in code.
+    template <typename T, size_t N>
+    static void splice_until_ret(SkWStream* buf, const T (&code)[N]) {
+        // On all platforms we splice today, return is a single T (byte on x86, u32 on ARM).
+        buf->write(&code, sizeof(T) * (N-1));
+    }
+
 #if defined(__aarch64__)
     static constexpr int kStride = 4;
     static void set_ctx(SkWStream* buf, void* ctx) {
@@ -236,7 +243,7 @@
     static bool splice(SkWStream* buf, SkRasterPipeline::StockStage st) {
         switch (st) {
             default: return false;
-        #define CASE(st) case SkRasterPipeline::st: splice(buf, kSplice_##st); break
+        #define CASE(st) case SkRasterPipeline::st: splice_until_ret(buf, kSplice_##st); break
             CASE(clear);
             CASE(plus_);
             CASE(srcover);
@@ -310,7 +317,7 @@
                 }
             }
 
-            splice(&buf, kSplice_inc_x);
+            splice_until_ret(&buf, kSplice_inc_x);
             loop(&buf, loop_start);  // Loop back to handle more pixels if not done.
             after_loop(&buf);
             ret(&buf);  // We're done.
diff --git a/src/splicer/SkSplicer_generated.h b/src/splicer/SkSplicer_generated.h
index a7eb827..a179e1d 100644
--- a/src/splicer/SkSplicer_generated.h
+++ b/src/splicer/SkSplicer_generated.h
@@ -15,18 +15,21 @@
 
 static const unsigned int kSplice_inc_x[] = {
     0x91001000,                                 //  add           x0, x0, #0x4
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clear[] = {
     0x6f00e400,                                 //  movi          v0.2d, #0x0
     0x6f00e401,                                 //  movi          v1.2d, #0x0
     0x6f00e402,                                 //  movi          v2.2d, #0x0
     0x6f00e403,                                 //  movi          v3.2d, #0x0
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_plus_[] = {
     0x4e24d400,                                 //  fadd          v0.4s, v0.4s, v4.4s
     0x4e25d421,                                 //  fadd          v1.4s, v1.4s, v5.4s
     0x4e26d442,                                 //  fadd          v2.4s, v2.4s, v6.4s
     0x4e27d463,                                 //  fadd          v3.4s, v3.4s, v7.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_srcover[] = {
     0x4d40c870,                                 //  ld1r          {v16.4s}, [x3]
@@ -35,6 +38,7 @@
     0x4e25ce01,                                 //  fmla          v1.4s, v16.4s, v5.4s
     0x4e26ce02,                                 //  fmla          v2.4s, v16.4s, v6.4s
     0x4e27ce03,                                 //  fmla          v3.4s, v16.4s, v7.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_dstover[] = {
     0x4d40c870,                                 //  ld1r          {v16.4s}, [x3]
@@ -43,6 +47,7 @@
     0x4e21ce05,                                 //  fmla          v5.4s, v16.4s, v1.4s
     0x4e22ce06,                                 //  fmla          v6.4s, v16.4s, v2.4s
     0x4e23ce07,                                 //  fmla          v7.4s, v16.4s, v3.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clamp_0[] = {
     0x6f00e410,                                 //  movi          v16.2d, #0x0
@@ -50,6 +55,7 @@
     0x4e30f421,                                 //  fmax          v1.4s, v1.4s, v16.4s
     0x4e30f442,                                 //  fmax          v2.4s, v2.4s, v16.4s
     0x4e30f463,                                 //  fmax          v3.4s, v3.4s, v16.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clamp_1[] = {
     0x4d40c870,                                 //  ld1r          {v16.4s}, [x3]
@@ -57,6 +63,7 @@
     0x4eb0f421,                                 //  fmin          v1.4s, v1.4s, v16.4s
     0x4eb0f442,                                 //  fmin          v2.4s, v2.4s, v16.4s
     0x4eb0f463,                                 //  fmin          v3.4s, v3.4s, v16.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clamp_a[] = {
     0x4d40c870,                                 //  ld1r          {v16.4s}, [x3]
@@ -64,6 +71,7 @@
     0x4ea3f400,                                 //  fmin          v0.4s, v0.4s, v3.4s
     0x4ea3f421,                                 //  fmin          v1.4s, v1.4s, v3.4s
     0x4ea3f442,                                 //  fmin          v2.4s, v2.4s, v3.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_swap[] = {
     0x4ea31c70,                                 //  mov           v16.16b, v3.16b
@@ -78,23 +86,27 @@
     0x4eb21e45,                                 //  mov           v5.16b, v18.16b
     0x4eb11e26,                                 //  mov           v6.16b, v17.16b
     0x4eb01e07,                                 //  mov           v7.16b, v16.16b
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_move_src_dst[] = {
     0x4ea01c04,                                 //  mov           v4.16b, v0.16b
     0x4ea11c25,                                 //  mov           v5.16b, v1.16b
     0x4ea21c46,                                 //  mov           v6.16b, v2.16b
     0x4ea31c67,                                 //  mov           v7.16b, v3.16b
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_move_dst_src[] = {
     0x4ea41c80,                                 //  mov           v0.16b, v4.16b
     0x4ea51ca1,                                 //  mov           v1.16b, v5.16b
     0x4ea61cc2,                                 //  mov           v2.16b, v6.16b
     0x4ea71ce3,                                 //  mov           v3.16b, v7.16b
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_premul[] = {
     0x6e23dc00,                                 //  fmul          v0.4s, v0.4s, v3.4s
     0x6e23dc21,                                 //  fmul          v1.4s, v1.4s, v3.4s
     0x6e23dc42,                                 //  fmul          v2.4s, v2.4s, v3.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_unpremul[] = {
     0x4d40c870,                                 //  ld1r          {v16.4s}, [x3]
@@ -104,6 +116,7 @@
     0x6e20de00,                                 //  fmul          v0.4s, v16.4s, v0.4s
     0x6e21de01,                                 //  fmul          v1.4s, v16.4s, v1.4s
     0x6e22de02,                                 //  fmul          v2.4s, v16.4s, v2.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_from_srgb[] = {
     0x91005068,                                 //  add           x8, x3, #0x14
@@ -135,6 +148,7 @@
     0x6e761ea0,                                 //  bsl           v0.16b, v21.16b, v22.16b
     0x6e781e41,                                 //  bsl           v1.16b, v18.16b, v24.16b
     0x6e711e62,                                 //  bsl           v2.16b, v19.16b, v17.16b
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_to_srgb[] = {
     0x6ea1d810,                                 //  frsqrte       v16.4s, v0.4s
@@ -197,6 +211,7 @@
     0x6e701f40,                                 //  bsl           v0.16b, v26.16b, v16.16b
     0x6e721e61,                                 //  bsl           v1.16b, v19.16b, v18.16b
     0x6e741e22,                                 //  bsl           v2.16b, v17.16b, v20.16b
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_scale_u8[] = {
     0xf9400048,                                 //  ldr           x8, [x2]
@@ -218,6 +233,7 @@
     0x6e21de01,                                 //  fmul          v1.4s, v16.4s, v1.4s
     0x6e22de02,                                 //  fmul          v2.4s, v16.4s, v2.4s
     0x6e23de03,                                 //  fmul          v3.4s, v16.4s, v3.4s
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_load_tables[] = {
     0xa9402849,                                 //  ldp           x9, x10, [x2]
@@ -271,6 +287,7 @@
     0x4e21d863,                                 //  scvtf         v3.4s, v3.4s
     0x6e1c0622,                                 //  mov           v2.s[3], v17.s[0]
     0x4f909063,                                 //  fmul          v3.4s, v3.4s, v16.s[0]
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_load_8888[] = {
     0xf9400048,                                 //  ldr           x8, [x2]
@@ -293,6 +310,7 @@
     0x4f839021,                                 //  fmul          v1.4s, v1.4s, v3.s[0]
     0x4f839042,                                 //  fmul          v2.4s, v2.4s, v3.s[0]
     0x4f839243,                                 //  fmul          v3.4s, v18.4s, v3.s[0]
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_store_8888[] = {
     0xbd400470,                                 //  ldr           s16, [x3,#4]
@@ -313,6 +331,7 @@
     0x4f385610,                                 //  shl           v16.4s, v16.4s, #24
     0x4eb01e30,                                 //  orr           v16.16b, v17.16b, v16.16b
     0x3ca96910,                                 //  str           q16, [x8,x9]
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_load_f16[] = {
     0xf9400048,                                 //  ldr           x8, [x2]
@@ -322,6 +341,7 @@
     0x0e217a21,                                 //  fcvtl         v1.4s, v17.4h
     0x0e217a42,                                 //  fcvtl         v2.4s, v18.4h
     0x0e217a63,                                 //  fcvtl         v3.4s, v19.4h
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_store_f16[] = {
     0xf9400048,                                 //  ldr           x8, [x2]
@@ -331,6 +351,7 @@
     0x8b000d08,                                 //  add           x8, x8, x0, lsl #3
     0x0e216873,                                 //  fcvtn         v19.4h, v3.4s
     0x0c000510,                                 //  st4           {v16.4h-v19.4h}, [x8]
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_matrix_3x4[] = {
     0xaa0203e8,                                 //  mov           x8, x2
@@ -358,24 +379,28 @@
     0x4eb01e00,                                 //  mov           v0.16b, v16.16b
     0x4eb11e21,                                 //  mov           v1.16b, v17.16b
     0x4eb21e42,                                 //  mov           v2.16b, v18.16b
+    0xd65f03c0,                                 //  return        (synthetic)
 };
 
 #elif defined(__ARM_NEON__)
 
 static const unsigned int kSplice_inc_x[] = {
     0xe2800002,                                 //  add           r0, r0, #2
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clear[] = {
     0xf2800010,                                 //  vmov.i32      d0, #0
     0xf2801010,                                 //  vmov.i32      d1, #0
     0xf2802010,                                 //  vmov.i32      d2, #0
     0xf2803010,                                 //  vmov.i32      d3, #0
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_plus_[] = {
     0xf2000d04,                                 //  vadd.f32      d0, d0, d4
     0xf2011d05,                                 //  vadd.f32      d1, d1, d5
     0xf2022d06,                                 //  vadd.f32      d2, d2, d6
     0xf2033d07,                                 //  vadd.f32      d3, d3, d7
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_srcover[] = {
     0xf4e30c9f,                                 //  vld1.32       {d16[]}, [r3 :32]
@@ -384,6 +409,7 @@
     0xf2051c30,                                 //  vfma.f32      d1, d5, d16
     0xf2062c30,                                 //  vfma.f32      d2, d6, d16
     0xf2073c30,                                 //  vfma.f32      d3, d7, d16
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_dstover[] = {
     0xf4e30c9f,                                 //  vld1.32       {d16[]}, [r3 :32]
@@ -392,6 +418,7 @@
     0xf2015c30,                                 //  vfma.f32      d5, d1, d16
     0xf2026c30,                                 //  vfma.f32      d6, d2, d16
     0xf2037c30,                                 //  vfma.f32      d7, d3, d16
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clamp_0[] = {
     0xf2c00010,                                 //  vmov.i32      d16, #0
@@ -399,6 +426,7 @@
     0xf2011f20,                                 //  vmax.f32      d1, d1, d16
     0xf2022f20,                                 //  vmax.f32      d2, d2, d16
     0xf2033f20,                                 //  vmax.f32      d3, d3, d16
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clamp_1[] = {
     0xf4e30c9f,                                 //  vld1.32       {d16[]}, [r3 :32]
@@ -406,6 +434,7 @@
     0xf2211f20,                                 //  vmin.f32      d1, d1, d16
     0xf2222f20,                                 //  vmin.f32      d2, d2, d16
     0xf2233f20,                                 //  vmin.f32      d3, d3, d16
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_clamp_a[] = {
     0xf4e30c9f,                                 //  vld1.32       {d16[]}, [r3 :32]
@@ -413,6 +442,7 @@
     0xf2200f03,                                 //  vmin.f32      d0, d0, d3
     0xf2211f03,                                 //  vmin.f32      d1, d1, d3
     0xf2222f03,                                 //  vmin.f32      d2, d2, d3
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_swap[] = {
     0xeef00b43,                                 //  vmov.f64      d16, d3
@@ -427,23 +457,27 @@
     0xeeb05b62,                                 //  vmov.f64      d5, d18
     0xeeb06b61,                                 //  vmov.f64      d6, d17
     0xeeb07b60,                                 //  vmov.f64      d7, d16
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_move_src_dst[] = {
     0xeeb04b40,                                 //  vmov.f64      d4, d0
     0xeeb05b41,                                 //  vmov.f64      d5, d1
     0xeeb06b42,                                 //  vmov.f64      d6, d2
     0xeeb07b43,                                 //  vmov.f64      d7, d3
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_move_dst_src[] = {
     0xeeb00b44,                                 //  vmov.f64      d0, d4
     0xeeb01b45,                                 //  vmov.f64      d1, d5
     0xeeb02b46,                                 //  vmov.f64      d2, d6
     0xeeb03b47,                                 //  vmov.f64      d3, d7
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_premul[] = {
     0xf3000d13,                                 //  vmul.f32      d0, d0, d3
     0xf3011d13,                                 //  vmul.f32      d1, d1, d3
     0xf3022d13,                                 //  vmul.f32      d2, d2, d3
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_unpremul[] = {
     0xed2d8b04,                                 //  vpush         {d8-d9}
@@ -457,6 +491,7 @@
     0xf3011d91,                                 //  vmul.f32      d1, d17, d1
     0xf3012d92,                                 //  vmul.f32      d2, d17, d2
     0xecbd8b04,                                 //  vpop          {d8-d9}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_from_srgb[] = {
     0xed2d8b02,                                 //  vpush         {d8}
@@ -492,6 +527,7 @@
     0xf31611b2,                                 //  vbsl          d1, d22, d18
     0xf31921b8,                                 //  vbsl          d2, d25, d24
     0xecbd8b02,                                 //  vpop          {d8}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_to_srgb[] = {
     0xed2d8b02,                                 //  vpush         {d8}
@@ -559,6 +595,7 @@
     0xf31111b3,                                 //  vbsl          d1, d17, d19
     0xf31221b4,                                 //  vbsl          d2, d18, d20
     0xecbd8b02,                                 //  vpop          {d8}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_scale_u8[] = {
     0xed2d8b02,                                 //  vpush         {d8}
@@ -580,6 +617,7 @@
     0xf3003d93,                                 //  vmul.f32      d3, d16, d3
     0xe28dd008,                                 //  add           sp, sp, #8
     0xecbd8b02,                                 //  vpop          {d8}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_load_tables[] = {
     0xe92d41f0,                                 //  push          {r4, r5, r6, r7, r8, lr}
@@ -619,6 +657,7 @@
     0xe0887105,                                 //  add           r7, r8, r5, lsl #2
     0xed972a00,                                 //  vldr          s4, [r7]
     0xe8bd41f0,                                 //  pop           {r4, r5, r6, r7, r8, lr}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_load_8888[] = {
     0xe92d4800,                                 //  push          {fp, lr}
@@ -643,6 +682,7 @@
     0xf2a119c2,                                 //  vmul.f32      d1, d17, d2[0]
     0xf2a029c2,                                 //  vmul.f32      d2, d16, d2[0]
     0xe8bd4800,                                 //  pop           {fp, lr}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_store_8888[] = {
     0xe283c004,                                 //  add           ip, r3, #4
@@ -668,6 +708,7 @@
     0xf26001b2,                                 //  vorr          d16, d16, d18
     0xf26001b1,                                 //  vorr          d16, d16, d17
     0xedcc0b00,                                 //  vstr          d16, [ip]
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_load_f16[] = {
     0xed2d8b04,                                 //  vpush         {d8-d9}
@@ -685,6 +726,7 @@
     0xeeb02b43,                                 //  vmov.f64      d2, d3
     0xeeb03b49,                                 //  vmov.f64      d3, d9
     0xecbd8b04,                                 //  vpop          {d8-d9}
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_store_f16[] = {
     0xeef00b41,                                 //  vmov.f64      d16, d1
@@ -698,6 +740,7 @@
     0xf3f60622,                                 //  vcvt.f16.f32  d16, q9
     0xe08cc180,                                 //  add           ip, ip, r0, lsl #3
     0xf44c084f,                                 //  vst2.16       {d16-d17}, [ip]
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 static const unsigned int kSplice_matrix_3x4[] = {
     0xe282c020,                                 //  add           ip, r2, #32
@@ -735,24 +778,28 @@
     0xf22101b1,                                 //  vorr          d0, d17, d17
     0xf22021b0,                                 //  vorr          d2, d16, d16
     0xf22211b2,                                 //  vorr          d1, d18, d18
+    0xe12fff1e,                                 //  return        (synthetic)
 };
 
 #else
 
 static const unsigned char kSplice_inc_x[] = {
     0x48,0x83,0xc7,0x08,                        //  add           $0x8,%rdi
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_clear[] = {
     0xc5,0xfc,0x57,0xc0,                        //  vxorps        %ymm0,%ymm0,%ymm0
     0xc5,0xf4,0x57,0xc9,                        //  vxorps        %ymm1,%ymm1,%ymm1
     0xc5,0xec,0x57,0xd2,                        //  vxorps        %ymm2,%ymm2,%ymm2
     0xc5,0xe4,0x57,0xdb,                        //  vxorps        %ymm3,%ymm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_plus_[] = {
     0xc5,0xfc,0x58,0xc4,                        //  vaddps        %ymm4,%ymm0,%ymm0
     0xc5,0xf4,0x58,0xcd,                        //  vaddps        %ymm5,%ymm1,%ymm1
     0xc5,0xec,0x58,0xd6,                        //  vaddps        %ymm6,%ymm2,%ymm2
     0xc5,0xe4,0x58,0xdf,                        //  vaddps        %ymm7,%ymm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_srcover[] = {
     0xc4,0x62,0x7d,0x18,0x01,                   //  vbroadcastss  (%rcx),%ymm8
@@ -761,6 +808,7 @@
     0xc4,0xc2,0x55,0xb8,0xc8,                   //  vfmadd231ps   %ymm8,%ymm5,%ymm1
     0xc4,0xc2,0x4d,0xb8,0xd0,                   //  vfmadd231ps   %ymm8,%ymm6,%ymm2
     0xc4,0xc2,0x45,0xb8,0xd8,                   //  vfmadd231ps   %ymm8,%ymm7,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_dstover[] = {
     0xc4,0x62,0x7d,0x18,0x01,                   //  vbroadcastss  (%rcx),%ymm8
@@ -769,6 +817,7 @@
     0xc4,0xc2,0x75,0xb8,0xe8,                   //  vfmadd231ps   %ymm8,%ymm1,%ymm5
     0xc4,0xc2,0x6d,0xb8,0xf0,                   //  vfmadd231ps   %ymm8,%ymm2,%ymm6
     0xc4,0xc2,0x65,0xb8,0xf8,                   //  vfmadd231ps   %ymm8,%ymm3,%ymm7
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_clamp_0[] = {
     0xc4,0x41,0x3c,0x57,0xc0,                   //  vxorps        %ymm8,%ymm8,%ymm8
@@ -776,6 +825,7 @@
     0xc4,0xc1,0x74,0x5f,0xc8,                   //  vmaxps        %ymm8,%ymm1,%ymm1
     0xc4,0xc1,0x6c,0x5f,0xd0,                   //  vmaxps        %ymm8,%ymm2,%ymm2
     0xc4,0xc1,0x64,0x5f,0xd8,                   //  vmaxps        %ymm8,%ymm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_clamp_1[] = {
     0xc4,0x62,0x7d,0x18,0x01,                   //  vbroadcastss  (%rcx),%ymm8
@@ -783,6 +833,7 @@
     0xc4,0xc1,0x74,0x5d,0xc8,                   //  vminps        %ymm8,%ymm1,%ymm1
     0xc4,0xc1,0x6c,0x5d,0xd0,                   //  vminps        %ymm8,%ymm2,%ymm2
     0xc4,0xc1,0x64,0x5d,0xd8,                   //  vminps        %ymm8,%ymm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_clamp_a[] = {
     0xc4,0x62,0x7d,0x18,0x01,                   //  vbroadcastss  (%rcx),%ymm8
@@ -790,6 +841,7 @@
     0xc5,0xfc,0x5d,0xc3,                        //  vminps        %ymm3,%ymm0,%ymm0
     0xc5,0xf4,0x5d,0xcb,                        //  vminps        %ymm3,%ymm1,%ymm1
     0xc5,0xec,0x5d,0xd3,                        //  vminps        %ymm3,%ymm2,%ymm2
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_swap[] = {
     0xc5,0x7c,0x28,0xc3,                        //  vmovaps       %ymm3,%ymm8
@@ -804,23 +856,27 @@
     0xc5,0x7c,0x29,0xd5,                        //  vmovaps       %ymm10,%ymm5
     0xc5,0x7c,0x29,0xce,                        //  vmovaps       %ymm9,%ymm6
     0xc5,0x7c,0x29,0xc7,                        //  vmovaps       %ymm8,%ymm7
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_move_src_dst[] = {
     0xc5,0xfc,0x28,0xe0,                        //  vmovaps       %ymm0,%ymm4
     0xc5,0xfc,0x28,0xe9,                        //  vmovaps       %ymm1,%ymm5
     0xc5,0xfc,0x28,0xf2,                        //  vmovaps       %ymm2,%ymm6
     0xc5,0xfc,0x28,0xfb,                        //  vmovaps       %ymm3,%ymm7
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_move_dst_src[] = {
     0xc5,0xfc,0x28,0xc4,                        //  vmovaps       %ymm4,%ymm0
     0xc5,0xfc,0x28,0xcd,                        //  vmovaps       %ymm5,%ymm1
     0xc5,0xfc,0x28,0xd6,                        //  vmovaps       %ymm6,%ymm2
     0xc5,0xfc,0x28,0xdf,                        //  vmovaps       %ymm7,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_premul[] = {
     0xc5,0xfc,0x59,0xc3,                        //  vmulps        %ymm3,%ymm0,%ymm0
     0xc5,0xf4,0x59,0xcb,                        //  vmulps        %ymm3,%ymm1,%ymm1
     0xc5,0xec,0x59,0xd3,                        //  vmulps        %ymm3,%ymm2,%ymm2
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_unpremul[] = {
     0xc4,0x41,0x3c,0x57,0xc0,                   //  vxorps        %ymm8,%ymm8,%ymm8
@@ -831,6 +887,7 @@
     0xc5,0xbc,0x59,0xc0,                        //  vmulps        %ymm0,%ymm8,%ymm0
     0xc5,0xbc,0x59,0xc9,                        //  vmulps        %ymm1,%ymm8,%ymm1
     0xc5,0xbc,0x59,0xd2,                        //  vmulps        %ymm2,%ymm8,%ymm2
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_from_srgb[] = {
     0xc4,0x62,0x7d,0x18,0x41,0x1c,              //  vbroadcastss  0x1c(%rcx),%ymm8
@@ -858,6 +915,7 @@
     0xc4,0x42,0x35,0xa8,0xde,                   //  vfmadd213ps   %ymm14,%ymm9,%ymm11
     0xc4,0xc1,0x6c,0xc2,0xd2,0x01,              //  vcmpltps      %ymm10,%ymm2,%ymm2
     0xc4,0xc3,0x25,0x4a,0xd0,0x20,              //  vblendvps     %ymm2,%ymm8,%ymm11,%ymm2
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_to_srgb[] = {
     0xc5,0x7c,0x52,0xc0,                        //  vrsqrtps      %ymm0,%ymm8
@@ -893,6 +951,7 @@
     0xc5,0x3c,0x59,0xc2,                        //  vmulps        %ymm2,%ymm8,%ymm8
     0xc4,0xc1,0x6c,0xc2,0xd2,0x01,              //  vcmpltps      %ymm10,%ymm2,%ymm2
     0xc4,0xc3,0x35,0x4a,0xd0,0x20,              //  vblendvps     %ymm2,%ymm8,%ymm9,%ymm2
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_scale_u8[] = {
     0x48,0x8b,0x02,                             //  mov           (%rdx),%rax
@@ -904,6 +963,7 @@
     0xc5,0xbc,0x59,0xc9,                        //  vmulps        %ymm1,%ymm8,%ymm1
     0xc5,0xbc,0x59,0xd2,                        //  vmulps        %ymm2,%ymm8,%ymm2
     0xc5,0xbc,0x59,0xdb,                        //  vmulps        %ymm3,%ymm8,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_load_tables[] = {
     0x48,0x8b,0x02,                             //  mov           (%rdx),%rax
@@ -928,6 +988,7 @@
     0xc5,0xfc,0x5b,0xdb,                        //  vcvtdq2ps     %ymm3,%ymm3
     0xc4,0x62,0x7d,0x18,0x41,0x08,              //  vbroadcastss  0x8(%rcx),%ymm8
     0xc4,0xc1,0x64,0x59,0xd8,                   //  vmulps        %ymm8,%ymm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_load_8888[] = {
     0x48,0x8b,0x02,                             //  mov           (%rdx),%rax
@@ -948,6 +1009,7 @@
     0xc5,0xe5,0x72,0xd3,0x18,                   //  vpsrld        $0x18,%ymm3,%ymm3
     0xc5,0xfc,0x5b,0xdb,                        //  vcvtdq2ps     %ymm3,%ymm3
     0xc4,0xc1,0x64,0x59,0xd8,                   //  vmulps        %ymm8,%ymm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_store_8888[] = {
     0x48,0x8b,0x02,                             //  mov           (%rdx),%rax
@@ -967,6 +1029,7 @@
     0xc4,0x41,0x2d,0xeb,0xc0,                   //  vpor          %ymm8,%ymm10,%ymm8
     0xc4,0x41,0x35,0xeb,0xc0,                   //  vpor          %ymm8,%ymm9,%ymm8
     0xc5,0x7e,0x7f,0x04,0xb8,                   //  vmovdqu       %ymm8,(%rax,%rdi,4)
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_load_f16[] = {
     0x48,0x8b,0x02,                             //  mov           (%rdx),%rax
@@ -990,6 +1053,7 @@
     0xc4,0xe2,0x7d,0x13,0xd2,                   //  vcvtph2ps     %xmm2,%ymm2
     0xc4,0xc1,0x39,0x6d,0xda,                   //  vpunpckhqdq   %xmm10,%xmm8,%xmm3
     0xc4,0xe2,0x7d,0x13,0xdb,                   //  vcvtph2ps     %xmm3,%ymm3
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_store_f16[] = {
     0x48,0x8b,0x02,                             //  mov           (%rdx),%rax
@@ -1009,6 +1073,7 @@
     0xc5,0x7a,0x7f,0x4c,0xf8,0x20,              //  vmovdqu       %xmm9,0x20(%rax,%rdi,8)
     0xc4,0x41,0x39,0x6a,0xc2,                   //  vpunpckhdq    %xmm10,%xmm8,%xmm8
     0xc5,0x7a,0x7f,0x44,0xf8,0x30,              //  vmovdqu       %xmm8,0x30(%rax,%rdi,8)
+    0xc3,                                       //  return        (synthetic)
 };
 static const unsigned char kSplice_matrix_3x4[] = {
     0xc4,0x62,0x7d,0x18,0x0a,                   //  vbroadcastss  (%rdx),%ymm9
@@ -1035,6 +1100,7 @@
     0xc5,0x7c,0x29,0xc0,                        //  vmovaps       %ymm8,%ymm0
     0xc5,0x7c,0x29,0xc9,                        //  vmovaps       %ymm9,%ymm1
     0xc5,0x7c,0x29,0xd2,                        //  vmovaps       %ymm10,%ymm2
+    0xc3,                                       //  return        (synthetic)
 };
 
 #endif
diff --git a/src/splicer/build_stages.py b/src/splicer/build_stages.py
index e301494..900b47f 100755
--- a/src/splicer/build_stages.py
+++ b/src/splicer/build_stages.py
@@ -40,7 +40,7 @@
                       ['-c', 'src/splicer/SkSplicer_stages.cpp'] +
                       ['-o', 'armv7.o'])
 
-def parse_object_file(dst, dot_o, array_type, done, target=None):
+def parse_object_file(dst, dot_o, array_type, jump, ret, target=None):
   cmd = [ objdump, '-d', dot_o]
   if target:
     cmd += ['--target', target]
@@ -68,13 +68,18 @@
     for arg in args:
       assert 'rip' not in arg  # TODO: detect on aarch64 too
 
-    if code == done:
-      print >>dst,'};'
-      continue
+    # At the end of every stage function there's a jump to next().
+    # We replace that with a ret to make these stages work with an interpreter.
+    if code == jump:
+      code = ret
+      inst = 'return'
+      args = '(synthetic)'
 
     hexed = ''.join('0x'+x+',' for x in code.split(' '))
     print >>dst,'    ' + hexed + ' '*(44-len(hexed)) + \
                 '//  ' + inst  + ' '*(14-len(inst))  + args
+    if code == ret:
+      print >>dst,'};'
 
 with open('src/splicer/SkSplicer_generated.h', 'w') as f:
   print >>f,'''/*
@@ -92,11 +97,11 @@
 
 #if defined(__aarch64__)
 '''
-  parse_object_file(f, 'aarch64.o', 'unsigned int', '14000000')
+  parse_object_file(f, 'aarch64.o', 'unsigned int', '14000000', 'd65f03c0')
   print >>f,'\n#elif defined(__ARM_NEON__)\n'
-  parse_object_file(f, 'armv7.o', 'unsigned int', 'eafffffe',
+  parse_object_file(f, 'armv7.o', 'unsigned int', 'eafffffe', 'e12fff1e',
                   target='elf32-littlearm')
   print >>f,'\n#else\n'
-  parse_object_file(f, 'hsw.o', 'unsigned char', 'e9 00 00 00 00')
+  parse_object_file(f, 'hsw.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
   print >>f,'\n#endif\n'
   print >>f,'#endif//SkSplicer_generated_DEFINED'