Remove returns at end of stage splices.
Returns? Where we're going, we don't need returns.
I have discovered a truly marvelous alternative, a description of which
this code review is too narrow to contain.
Change-Id: I13fb36eb75771bc691d8187dddd876efcebc57d6
Reviewed-on: https://skia-review.googlesource.com/8480
Reviewed-by: Herb Derby <herb@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/src/splicer/SkSplicer.cpp b/src/splicer/SkSplicer.cpp
index 71d4132..2fbab90 100644
--- a/src/splicer/SkSplicer.cpp
+++ b/src/splicer/SkSplicer.cpp
@@ -55,13 +55,6 @@
buf->write(&val, sizeof(val));
}
- // Splice up to (but not including) the final return instruction in code.
- template <typename T, size_t N>
- static void splice_until_ret(SkWStream* buf, const T (&code)[N]) {
- // On all platforms we splice today, return is a single T (byte on x86, u32 on ARM).
- buf->write(&code, sizeof(T) * (N-1));
- }
-
#if defined(__aarch64__)
static constexpr int kStride = 4;
static void set_ctx(SkWStream* buf, void* ctx) {
@@ -243,7 +236,7 @@
}
#endif
-#define CASE(prefix, st) case SkRasterPipeline::st: splice_until_ret(buf, prefix##_##st); break
+#define CASE(prefix, st) case SkRasterPipeline::st: splice(buf, prefix##_##st); break
#define DEFINE_SPLICE_STAGE(prefix) \
static bool prefix##_##splice_stage(SkWStream* buf, SkRasterPipeline::StockStage st) { \
switch (st) { \
@@ -302,14 +295,14 @@
#if defined(__aarch64__)
auto splice_stage = aarch64_splice_stage;
- auto inc_x = [](SkWStream* buf) { splice_until_ret(buf, aarch64_inc_x); };
+ auto inc_x = [](SkWStream* buf) { splice(buf, aarch64_inc_x); };
#elif defined(__ARM_NEON__)
// Late generation ARMv7, e.g. Cortex A15 or Krait.
if (!SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
return;
}
auto splice_stage = armv7_splice_stage;
- auto inc_x = [](SkWStream* buf) { splice_until_ret(buf, armv7_inc_x); };
+ auto inc_x = [](SkWStream* buf) { splice(buf, armv7_inc_x); };
#else
// To keep things simple, only x86-64 supported.
if (sizeof(void*) != 8) {
@@ -319,8 +312,8 @@
auto splice_stage = hsw ? hsw_splice_stage : sse2_splice_stage;
auto inc_x = [hsw](SkWStream* buf) {
- if (hsw) { splice_until_ret(buf, hsw_inc_x); }
- else { splice_until_ret(buf, sse2_inc_x); }
+ if (hsw) { splice(buf, hsw_inc_x); }
+ else { splice(buf, sse2_inc_x); }
};
auto ret = [hsw](SkWStream* buf) {
static const uint8_t vzeroupper[] = { 0xc5, 0xf8, 0x77 };
diff --git a/src/splicer/SkSplicer_generated.h b/src/splicer/SkSplicer_generated.h
index 0f823e6..cea6a55 100644
--- a/src/splicer/SkSplicer_generated.h
+++ b/src/splicer/SkSplicer_generated.h
@@ -13,7 +13,6 @@
static const unsigned int aarch64_inc_x[] = {
0x91001000, // add x0, x0, #0x4
- 0xd65f03c0, // return
};
static const unsigned int aarch64_seed_shader[] = {
0xaa0303e8, // mov x8, x3
@@ -32,7 +31,6 @@
0x4e27d421, // fadd v1.4s, v1.4s, v7.4s
0x6f00e406, // movi v6.2d, #0x0
0x6f00e407, // movi v7.2d, #0x0
- 0xd65f03c0, // return
};
static const unsigned int aarch64_constant_color[] = {
0x3dc00043, // ldr q3, [x2]
@@ -40,21 +38,18 @@
0x4e0c0461, // dup v1.4s, v3.s[1]
0x4e140462, // dup v2.4s, v3.s[2]
0x4e1c0463, // dup v3.4s, v3.s[3]
- 0xd65f03c0, // return
};
static const unsigned int aarch64_clear[] = {
0x6f00e400, // movi v0.2d, #0x0
0x6f00e401, // movi v1.2d, #0x0
0x6f00e402, // movi v2.2d, #0x0
0x6f00e403, // movi v3.2d, #0x0
- 0xd65f03c0, // return
};
static const unsigned int aarch64_plus_[] = {
0x4e24d400, // fadd v0.4s, v0.4s, v4.4s
0x4e25d421, // fadd v1.4s, v1.4s, v5.4s
0x4e26d442, // fadd v2.4s, v2.4s, v6.4s
0x4e27d463, // fadd v3.4s, v3.4s, v7.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_srcover[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -63,7 +58,6 @@
0x4e25ce01, // fmla v1.4s, v16.4s, v5.4s
0x4e26ce02, // fmla v2.4s, v16.4s, v6.4s
0x4e27ce03, // fmla v3.4s, v16.4s, v7.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_dstover[] = {
0x4d40c871, // ld1r {v17.4s}, [x3]
@@ -80,7 +74,6 @@
0x4eb11e21, // mov v1.16b, v17.16b
0x4eb21e42, // mov v2.16b, v18.16b
0x4eb31e63, // mov v3.16b, v19.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_clamp_0[] = {
0x6f00e410, // movi v16.2d, #0x0
@@ -88,7 +81,6 @@
0x4e30f421, // fmax v1.4s, v1.4s, v16.4s
0x4e30f442, // fmax v2.4s, v2.4s, v16.4s
0x4e30f463, // fmax v3.4s, v3.4s, v16.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_clamp_1[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -96,7 +88,6 @@
0x4eb0f421, // fmin v1.4s, v1.4s, v16.4s
0x4eb0f442, // fmin v2.4s, v2.4s, v16.4s
0x4eb0f463, // fmin v3.4s, v3.4s, v16.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_clamp_a[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -104,7 +95,6 @@
0x4ea3f400, // fmin v0.4s, v0.4s, v3.4s
0x4ea3f421, // fmin v1.4s, v1.4s, v3.4s
0x4ea3f442, // fmin v2.4s, v2.4s, v3.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_swap[] = {
0x4ea31c70, // mov v16.16b, v3.16b
@@ -119,27 +109,23 @@
0x4eb21e45, // mov v5.16b, v18.16b
0x4eb11e26, // mov v6.16b, v17.16b
0x4eb01e07, // mov v7.16b, v16.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_move_src_dst[] = {
0x4ea01c04, // mov v4.16b, v0.16b
0x4ea11c25, // mov v5.16b, v1.16b
0x4ea21c46, // mov v6.16b, v2.16b
0x4ea31c67, // mov v7.16b, v3.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_move_dst_src[] = {
0x4ea41c80, // mov v0.16b, v4.16b
0x4ea51ca1, // mov v1.16b, v5.16b
0x4ea61cc2, // mov v2.16b, v6.16b
0x4ea71ce3, // mov v3.16b, v7.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_premul[] = {
0x6e23dc00, // fmul v0.4s, v0.4s, v3.4s
0x6e23dc21, // fmul v1.4s, v1.4s, v3.4s
0x6e23dc42, // fmul v2.4s, v2.4s, v3.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_unpremul[] = {
0x4d40c870, // ld1r {v16.4s}, [x3]
@@ -149,7 +135,6 @@
0x6e20de00, // fmul v0.4s, v16.4s, v0.4s
0x6e21de01, // fmul v1.4s, v16.4s, v1.4s
0x6e22de02, // fmul v2.4s, v16.4s, v2.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_from_srgb[] = {
0x9100e068, // add x8, x3, #0x38
@@ -181,7 +166,6 @@
0x6e761ea0, // bsl v0.16b, v21.16b, v22.16b
0x6e781e41, // bsl v1.16b, v18.16b, v24.16b
0x6e711e62, // bsl v2.16b, v19.16b, v17.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_to_srgb[] = {
0x6ea1d810, // frsqrte v16.4s, v0.4s
@@ -244,7 +228,6 @@
0x6e701f40, // bsl v0.16b, v26.16b, v16.16b
0x6e721e61, // bsl v1.16b, v19.16b, v18.16b
0x6e741e22, // bsl v2.16b, v17.16b, v20.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_scale_u8[] = {
0xf9400048, // ldr x8, [x2]
@@ -266,7 +249,6 @@
0x6e21de01, // fmul v1.4s, v16.4s, v1.4s
0x6e22de02, // fmul v2.4s, v16.4s, v2.4s
0x6e23de03, // fmul v3.4s, v16.4s, v3.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_load_tables[] = {
0xa9402849, // ldp x9, x10, [x2]
@@ -320,7 +302,6 @@
0x4e21d863, // scvtf v3.4s, v3.4s
0x6e1c0622, // mov v2.s[3], v17.s[0]
0x4f909063, // fmul v3.4s, v3.4s, v16.s[0]
- 0xd65f03c0, // return
};
static const unsigned int aarch64_load_8888[] = {
0xf9400048, // ldr x8, [x2]
@@ -343,7 +324,6 @@
0x4f839021, // fmul v1.4s, v1.4s, v3.s[0]
0x4f839042, // fmul v2.4s, v2.4s, v3.s[0]
0x4f839243, // fmul v3.4s, v18.4s, v3.s[0]
- 0xd65f03c0, // return
};
static const unsigned int aarch64_store_8888[] = {
0xbd400870, // ldr s16, [x3,#8]
@@ -364,7 +344,6 @@
0x4f385610, // shl v16.4s, v16.4s, #24
0x4eb01e30, // orr v16.16b, v17.16b, v16.16b
0x3ca96910, // str q16, [x8,x9]
- 0xd65f03c0, // return
};
static const unsigned int aarch64_load_f16[] = {
0xf9400048, // ldr x8, [x2]
@@ -374,7 +353,6 @@
0x0e217a21, // fcvtl v1.4s, v17.4h
0x0e217a42, // fcvtl v2.4s, v18.4h
0x0e217a63, // fcvtl v3.4s, v19.4h
- 0xd65f03c0, // return
};
static const unsigned int aarch64_store_f16[] = {
0xf9400048, // ldr x8, [x2]
@@ -384,7 +362,6 @@
0x8b000d08, // add x8, x8, x0, lsl #3
0x0e216873, // fcvtn v19.4h, v3.4s
0x0c000510, // st4 {v16.4h-v19.4h}, [x8]
- 0xd65f03c0, // return
};
static const unsigned int aarch64_clamp_x[] = {
0x4d40c850, // ld1r {v16.4s}, [x2]
@@ -393,7 +370,6 @@
0x4eb18610, // add v16.4s, v16.4s, v17.4s
0x4eb0f400, // fmin v0.4s, v0.4s, v16.4s
0x4e20f640, // fmax v0.4s, v18.4s, v0.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_clamp_y[] = {
0x4d40c850, // ld1r {v16.4s}, [x2]
@@ -402,7 +378,6 @@
0x4eb18610, // add v16.4s, v16.4s, v17.4s
0x4eb0f421, // fmin v1.4s, v1.4s, v16.4s
0x4e21f641, // fmax v1.4s, v18.4s, v1.4s
- 0xd65f03c0, // return
};
static const unsigned int aarch64_matrix_2x3[] = {
0xaa0203e8, // mov x8, x2
@@ -419,7 +394,6 @@
0x4f951011, // fmla v17.4s, v0.4s, v21.s[0]
0x4eb01e00, // mov v0.16b, v16.16b
0x4eb11e21, // mov v1.16b, v17.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_matrix_3x4[] = {
0xaa0203e8, // mov x8, x2
@@ -447,7 +421,6 @@
0x4eb01e00, // mov v0.16b, v16.16b
0x4eb11e21, // mov v1.16b, v17.16b
0x4eb21e42, // mov v2.16b, v18.16b
- 0xd65f03c0, // return
};
static const unsigned int aarch64_linear_gradient_2stops[] = {
0xad404443, // ldp q3, q17, [x2]
@@ -460,11 +433,9 @@
0x4f911802, // fmla v2.4s, v0.4s, v17.s[2]
0x4fb11803, // fmla v3.4s, v0.4s, v17.s[3]
0x4eb01e00, // mov v0.16b, v16.16b
- 0xd65f03c0, // return
};
static const unsigned int armv7_inc_x[] = {
0xe2800002, // add r0, r0, #2
- 0xe12fff1e, // return
};
static const unsigned int armv7_seed_shader[] = {
0xee800b90, // vdup.32 d16, r0
@@ -483,7 +454,6 @@
0xf2806010, // vmov.i32 d6, #0
0xf2030da0, // vadd.f32 d0, d19, d16
0xf2807010, // vmov.i32 d7, #0
- 0xe12fff1e, // return
};
static const unsigned int armv7_constant_color[] = {
0xf4620a0f, // vld1.8 {d16-d17}, [r2]
@@ -491,21 +461,18 @@
0xf3bc1c20, // vdup.32 d1, d16[1]
0xf3b42c21, // vdup.32 d2, d17[0]
0xf3bc3c21, // vdup.32 d3, d17[1]
- 0xe12fff1e, // return
};
static const unsigned int armv7_clear[] = {
0xf2800010, // vmov.i32 d0, #0
0xf2801010, // vmov.i32 d1, #0
0xf2802010, // vmov.i32 d2, #0
0xf2803010, // vmov.i32 d3, #0
- 0xe12fff1e, // return
};
static const unsigned int armv7_plus_[] = {
0xf2000d04, // vadd.f32 d0, d0, d4
0xf2011d05, // vadd.f32 d1, d1, d5
0xf2022d06, // vadd.f32 d2, d2, d6
0xf2033d07, // vadd.f32 d3, d3, d7
- 0xe12fff1e, // return
};
static const unsigned int armv7_srcover[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -514,7 +481,6 @@
0xf2051c30, // vfma.f32 d1, d5, d16
0xf2062c30, // vfma.f32 d2, d6, d16
0xf2073c30, // vfma.f32 d3, d7, d16
- 0xe12fff1e, // return
};
static const unsigned int armv7_dstover[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -531,7 +497,6 @@
0xf22111b1, // vorr d1, d17, d17
0xf22221b2, // vorr d2, d18, d18
0xf22331b3, // vorr d3, d19, d19
- 0xe12fff1e, // return
};
static const unsigned int armv7_clamp_0[] = {
0xf2c00010, // vmov.i32 d16, #0
@@ -539,7 +504,6 @@
0xf2011f20, // vmax.f32 d1, d1, d16
0xf2022f20, // vmax.f32 d2, d2, d16
0xf2033f20, // vmax.f32 d3, d3, d16
- 0xe12fff1e, // return
};
static const unsigned int armv7_clamp_1[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -547,7 +511,6 @@
0xf2211f20, // vmin.f32 d1, d1, d16
0xf2222f20, // vmin.f32 d2, d2, d16
0xf2233f20, // vmin.f32 d3, d3, d16
- 0xe12fff1e, // return
};
static const unsigned int armv7_clamp_a[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
@@ -555,7 +518,6 @@
0xf2200f03, // vmin.f32 d0, d0, d3
0xf2211f03, // vmin.f32 d1, d1, d3
0xf2222f03, // vmin.f32 d2, d2, d3
- 0xe12fff1e, // return
};
static const unsigned int armv7_swap[] = {
0xeef00b43, // vmov.f64 d16, d3
@@ -570,27 +532,23 @@
0xeeb05b62, // vmov.f64 d5, d18
0xeeb06b61, // vmov.f64 d6, d17
0xeeb07b60, // vmov.f64 d7, d16
- 0xe12fff1e, // return
};
static const unsigned int armv7_move_src_dst[] = {
0xeeb04b40, // vmov.f64 d4, d0
0xeeb05b41, // vmov.f64 d5, d1
0xeeb06b42, // vmov.f64 d6, d2
0xeeb07b43, // vmov.f64 d7, d3
- 0xe12fff1e, // return
};
static const unsigned int armv7_move_dst_src[] = {
0xeeb00b44, // vmov.f64 d0, d4
0xeeb01b45, // vmov.f64 d1, d5
0xeeb02b46, // vmov.f64 d2, d6
0xeeb03b47, // vmov.f64 d3, d7
- 0xe12fff1e, // return
};
static const unsigned int armv7_premul[] = {
0xf3000d13, // vmul.f32 d0, d0, d3
0xf3011d13, // vmul.f32 d1, d1, d3
0xf3022d13, // vmul.f32 d2, d2, d3
- 0xe12fff1e, // return
};
static const unsigned int armv7_unpremul[] = {
0xed2d8b04, // vpush {d8-d9}
@@ -604,7 +562,6 @@
0xf3011d91, // vmul.f32 d1, d17, d1
0xf3012d92, // vmul.f32 d2, d17, d2
0xecbd8b04, // vpop {d8-d9}
- 0xe12fff1e, // return
};
static const unsigned int armv7_from_srgb[] = {
0xed2d8b02, // vpush {d8}
@@ -640,7 +597,6 @@
0xf31611b2, // vbsl d1, d22, d18
0xf31921b8, // vbsl d2, d25, d24
0xecbd8b02, // vpop {d8}
- 0xe12fff1e, // return
};
static const unsigned int armv7_to_srgb[] = {
0xed2d8b02, // vpush {d8}
@@ -708,7 +664,6 @@
0xf31111b3, // vbsl d1, d17, d19
0xf31221b4, // vbsl d2, d18, d20
0xecbd8b02, // vpop {d8}
- 0xe12fff1e, // return
};
static const unsigned int armv7_scale_u8[] = {
0xed2d8b02, // vpush {d8}
@@ -730,7 +685,6 @@
0xf3003d93, // vmul.f32 d3, d16, d3
0xe28dd008, // add sp, sp, #8
0xecbd8b02, // vpop {d8}
- 0xe12fff1e, // return
};
static const unsigned int armv7_load_tables[] = {
0xe92d41f0, // push {r4, r5, r6, r7, r8, lr}
@@ -770,7 +724,6 @@
0xe0887105, // add r7, r8, r5, lsl #2
0xed972a00, // vldr s4, [r7]
0xe8bd41f0, // pop {r4, r5, r6, r7, r8, lr}
- 0xe12fff1e, // return
};
static const unsigned int armv7_load_8888[] = {
0xe92d4800, // push {fp, lr}
@@ -795,7 +748,6 @@
0xf2a119c2, // vmul.f32 d1, d17, d2[0]
0xf2a029c2, // vmul.f32 d2, d16, d2[0]
0xe8bd4800, // pop {fp, lr}
- 0xe12fff1e, // return
};
static const unsigned int armv7_store_8888[] = {
0xe283c008, // add ip, r3, #8
@@ -821,7 +773,6 @@
0xf26001b2, // vorr d16, d16, d18
0xf26001b1, // vorr d16, d16, d17
0xedcc0b00, // vstr d16, [ip]
- 0xe12fff1e, // return
};
static const unsigned int armv7_load_f16[] = {
0xed2d8b04, // vpush {d8-d9}
@@ -839,7 +790,6 @@
0xeeb02b43, // vmov.f64 d2, d3
0xeeb03b49, // vmov.f64 d3, d9
0xecbd8b04, // vpop {d8-d9}
- 0xe12fff1e, // return
};
static const unsigned int armv7_store_f16[] = {
0xeef00b41, // vmov.f64 d16, d1
@@ -853,7 +803,6 @@
0xf3f60622, // vcvt.f16.f32 d16, q9
0xe08cc180, // add ip, ip, r0, lsl #3
0xf44c084f, // vst2.16 {d16-d17}, [ip]
- 0xe12fff1e, // return
};
static const unsigned int armv7_clamp_x[] = {
0xf3c70e1f, // vmov.i8 d16, #255
@@ -862,7 +811,6 @@
0xf2c01010, // vmov.i32 d17, #0
0xf2600f20, // vmin.f32 d16, d0, d16
0xf2010fa0, // vmax.f32 d0, d17, d16
- 0xe12fff1e, // return
};
static const unsigned int armv7_clamp_y[] = {
0xf3c70e1f, // vmov.i8 d16, #255
@@ -871,7 +819,6 @@
0xf2c01010, // vmov.i32 d17, #0
0xf2610f20, // vmin.f32 d16, d1, d16
0xf2011fa0, // vmax.f32 d1, d17, d16
- 0xe12fff1e, // return
};
static const unsigned int armv7_matrix_2x3[] = {
0xe282c00c, // add ip, r2, #12
@@ -891,7 +838,6 @@
0xf2401c33, // vfma.f32 d17, d0, d19
0xf22001b0, // vorr d0, d16, d16
0xf22111b1, // vorr d1, d17, d17
- 0xe12fff1e, // return
};
static const unsigned int armv7_matrix_3x4[] = {
0xe282c020, // add ip, r2, #32
@@ -929,7 +875,6 @@
0xf22101b1, // vorr d0, d17, d17
0xf22021b0, // vorr d2, d16, d16
0xf22211b2, // vorr d1, d18, d18
- 0xe12fff1e, // return
};
static const unsigned int armv7_linear_gradient_2stops[] = {
0xe1a0c002, // mov ip, r2
@@ -948,11 +893,9 @@
0xf3bc3c23, // vdup.32 d3, d19[1]
0xf2003c34, // vfma.f32 d3, d0, d20
0xf22001b0, // vorr d0, d16, d16
- 0xe12fff1e, // return
};
static const unsigned char sse2_inc_x[] = {
0x48,0x83,0xc7,0x04, // add $0x4,%rdi
- 0xc3, // return
};
static const unsigned char sse2_seed_shader[] = {
0x66,0x0f,0x6e,0xc7, // movd %edi,%xmm0
@@ -974,7 +917,6 @@
0x0f,0x57,0xed, // xorps %xmm5,%xmm5
0x0f,0x57,0xf6, // xorps %xmm6,%xmm6
0x0f,0x57,0xff, // xorps %xmm7,%xmm7
- 0xc3, // return
};
static const unsigned char sse2_constant_color[] = {
0x0f,0x10,0x1a, // movups (%rdx),%xmm3
@@ -985,21 +927,18 @@
0x0f,0x28,0xd3, // movaps %xmm3,%xmm2
0x0f,0xc6,0xd2,0xaa, // shufps $0xaa,%xmm2,%xmm2
0x0f,0xc6,0xdb,0xff, // shufps $0xff,%xmm3,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_clear[] = {
0x0f,0x57,0xc0, // xorps %xmm0,%xmm0
0x0f,0x57,0xc9, // xorps %xmm1,%xmm1
0x0f,0x57,0xd2, // xorps %xmm2,%xmm2
0x0f,0x57,0xdb, // xorps %xmm3,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_plus_[] = {
0x0f,0x58,0xc4, // addps %xmm4,%xmm0
0x0f,0x58,0xcd, // addps %xmm5,%xmm1
0x0f,0x58,0xd6, // addps %xmm6,%xmm2
0x0f,0x58,0xdf, // addps %xmm7,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_srcover[] = {
0xf3,0x44,0x0f,0x10,0x01, // movss (%rcx),%xmm8
@@ -1016,7 +955,6 @@
0x41,0x0f,0x58,0xd1, // addps %xmm9,%xmm2
0x44,0x0f,0x59,0xc7, // mulps %xmm7,%xmm8
0x41,0x0f,0x58,0xd8, // addps %xmm8,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_dstover[] = {
0xf3,0x44,0x0f,0x10,0x01, // movss (%rcx),%xmm8
@@ -1030,7 +968,6 @@
0x0f,0x58,0xd6, // addps %xmm6,%xmm2
0x41,0x0f,0x59,0xd8, // mulps %xmm8,%xmm3
0x0f,0x58,0xdf, // addps %xmm7,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_clamp_0[] = {
0x45,0x0f,0x57,0xc0, // xorps %xmm8,%xmm8
@@ -1038,7 +975,6 @@
0x41,0x0f,0x5f,0xc8, // maxps %xmm8,%xmm1
0x41,0x0f,0x5f,0xd0, // maxps %xmm8,%xmm2
0x41,0x0f,0x5f,0xd8, // maxps %xmm8,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_clamp_1[] = {
0xf3,0x44,0x0f,0x10,0x01, // movss (%rcx),%xmm8
@@ -1047,7 +983,6 @@
0x41,0x0f,0x5d,0xc8, // minps %xmm8,%xmm1
0x41,0x0f,0x5d,0xd0, // minps %xmm8,%xmm2
0x41,0x0f,0x5d,0xd8, // minps %xmm8,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_clamp_a[] = {
0xf3,0x44,0x0f,0x10,0x01, // movss (%rcx),%xmm8
@@ -1056,7 +991,6 @@
0x0f,0x5d,0xc3, // minps %xmm3,%xmm0
0x0f,0x5d,0xcb, // minps %xmm3,%xmm1
0x0f,0x5d,0xd3, // minps %xmm3,%xmm2
- 0xc3, // return
};
static const unsigned char sse2_swap[] = {
0x44,0x0f,0x28,0xc3, // movaps %xmm3,%xmm8
@@ -1071,27 +1005,23 @@
0x41,0x0f,0x28,0xea, // movaps %xmm10,%xmm5
0x41,0x0f,0x28,0xf1, // movaps %xmm9,%xmm6
0x41,0x0f,0x28,0xf8, // movaps %xmm8,%xmm7
- 0xc3, // return
};
static const unsigned char sse2_move_src_dst[] = {
0x0f,0x28,0xe0, // movaps %xmm0,%xmm4
0x0f,0x28,0xe9, // movaps %xmm1,%xmm5
0x0f,0x28,0xf2, // movaps %xmm2,%xmm6
0x0f,0x28,0xfb, // movaps %xmm3,%xmm7
- 0xc3, // return
};
static const unsigned char sse2_move_dst_src[] = {
0x0f,0x28,0xc4, // movaps %xmm4,%xmm0
0x0f,0x28,0xcd, // movaps %xmm5,%xmm1
0x0f,0x28,0xd6, // movaps %xmm6,%xmm2
0x0f,0x28,0xdf, // movaps %xmm7,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_premul[] = {
0x0f,0x59,0xc3, // mulps %xmm3,%xmm0
0x0f,0x59,0xcb, // mulps %xmm3,%xmm1
0x0f,0x59,0xd3, // mulps %xmm3,%xmm2
- 0xc3, // return
};
static const unsigned char sse2_unpremul[] = {
0x45,0x0f,0x57,0xc0, // xorps %xmm8,%xmm8
@@ -1103,7 +1033,6 @@
0x41,0x0f,0x59,0xc0, // mulps %xmm8,%xmm0
0x41,0x0f,0x59,0xc8, // mulps %xmm8,%xmm1
0x41,0x0f,0x59,0xd0, // mulps %xmm8,%xmm2
- 0xc3, // return
};
static const unsigned char sse2_from_srgb[] = {
0xf3,0x44,0x0f,0x10,0x41,0x40, // movss 0x40(%rcx),%xmm8
@@ -1153,7 +1082,6 @@
0x44,0x0f,0x54,0xc2, // andps %xmm2,%xmm8
0x41,0x0f,0x55,0xd1, // andnps %xmm9,%xmm2
0x41,0x0f,0x56,0xd0, // orps %xmm8,%xmm2
- 0xc3, // return
};
static const unsigned char sse2_to_srgb[] = {
0x48,0x83,0xec,0x28, // sub $0x28,%rsp
@@ -1223,7 +1151,6 @@
0x0f,0x28,0x34,0x24, // movaps (%rsp),%xmm6
0x0f,0x28,0x7c,0x24,0x10, // movaps 0x10(%rsp),%xmm7
0x48,0x83,0xc4,0x28, // add $0x28,%rsp
- 0xc3, // return
};
static const unsigned char sse2_scale_u8[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1239,7 +1166,6 @@
0x41,0x0f,0x59,0xc9, // mulps %xmm9,%xmm1
0x41,0x0f,0x59,0xd1, // mulps %xmm9,%xmm2
0x41,0x0f,0x59,0xd9, // mulps %xmm9,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_load_tables[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1303,7 +1229,6 @@
0xf3,0x0f,0x10,0x59,0x0c, // movss 0xc(%rcx),%xmm3
0x0f,0xc6,0xdb,0x00, // shufps $0x0,%xmm3,%xmm3
0x41,0x0f,0x59,0xd8, // mulps %xmm8,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_load_8888[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1328,7 +1253,6 @@
0x66,0x0f,0x72,0xd3,0x18, // psrld $0x18,%xmm3
0x0f,0x5b,0xdb, // cvtdq2ps %xmm3,%xmm3
0x41,0x0f,0x59,0xd8, // mulps %xmm8,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_store_8888[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1352,7 +1276,6 @@
0x66,0x45,0x0f,0xeb,0xc1, // por %xmm9,%xmm8
0x66,0x45,0x0f,0xeb,0xc2, // por %xmm10,%xmm8
0xf3,0x44,0x0f,0x7f,0x04,0xb8, // movdqu %xmm8,(%rax,%rdi,4)
- 0xc3, // return
};
static const unsigned char sse2_load_f16[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1381,7 +1304,6 @@
0x66,0x41,0x0f,0x69,0xd8, // punpckhwd %xmm8,%xmm3
0x66,0x0f,0x72,0xf3,0x0d, // pslld $0xd,%xmm3
0x41,0x0f,0x59,0xd9, // mulps %xmm9,%xmm3
- 0xc3, // return
};
static const unsigned char sse2_store_f16[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1407,7 +1329,6 @@
0xf3,0x44,0x0f,0x7f,0x0c,0xf8, // movdqu %xmm9,(%rax,%rdi,8)
0x66,0x45,0x0f,0x6a,0xd0, // punpckhdq %xmm8,%xmm10
0xf3,0x44,0x0f,0x7f,0x54,0xf8,0x10, // movdqu %xmm10,0x10(%rax,%rdi,8)
- 0xc3, // return
};
static const unsigned char sse2_clamp_x[] = {
0xf3,0x44,0x0f,0x10,0x02, // movss (%rdx),%xmm8
@@ -1418,7 +1339,6 @@
0x45,0x0f,0x57,0xc0, // xorps %xmm8,%xmm8
0x44,0x0f,0x5f,0xc0, // maxps %xmm0,%xmm8
0x41,0x0f,0x28,0xc0, // movaps %xmm8,%xmm0
- 0xc3, // return
};
static const unsigned char sse2_clamp_y[] = {
0xf3,0x44,0x0f,0x10,0x02, // movss (%rdx),%xmm8
@@ -1429,7 +1349,6 @@
0x45,0x0f,0x57,0xc0, // xorps %xmm8,%xmm8
0x44,0x0f,0x5f,0xc1, // maxps %xmm1,%xmm8
0x41,0x0f,0x28,0xc8, // movaps %xmm8,%xmm1
- 0xc3, // return
};
static const unsigned char sse2_matrix_2x3[] = {
0x44,0x0f,0x28,0xc9, // movaps %xmm1,%xmm9
@@ -1454,7 +1373,6 @@
0x45,0x0f,0x58,0xd3, // addps %xmm11,%xmm10
0x41,0x0f,0x59,0xc8, // mulps %xmm8,%xmm1
0x41,0x0f,0x58,0xca, // addps %xmm10,%xmm1
- 0xc3, // return
};
static const unsigned char sse2_matrix_3x4[] = {
0x44,0x0f,0x28,0xc9, // movaps %xmm1,%xmm9
@@ -1502,7 +1420,6 @@
0x45,0x0f,0x59,0xd0, // mulps %xmm8,%xmm10
0x45,0x0f,0x58,0xd3, // addps %xmm11,%xmm10
0x41,0x0f,0x28,0xd2, // movaps %xmm10,%xmm2
- 0xc3, // return
};
static const unsigned char sse2_linear_gradient_2stops[] = {
0x44,0x0f,0x10,0x0a, // movups (%rdx),%xmm9
@@ -1530,11 +1447,9 @@
0x0f,0x59,0xd8, // mulps %xmm0,%xmm3
0x41,0x0f,0x58,0xd9, // addps %xmm9,%xmm3
0x41,0x0f,0x28,0xc0, // movaps %xmm8,%xmm0
- 0xc3, // return
};
static const unsigned char hsw_inc_x[] = {
0x48,0x83,0xc7,0x08, // add $0x8,%rdi
- 0xc3, // return
};
static const unsigned char hsw_seed_shader[] = {
0xc5,0xf9,0x6e,0xc7, // vmovd %edi,%xmm0
@@ -1552,28 +1467,24 @@
0xc5,0xd4,0x57,0xed, // vxorps %ymm5,%ymm5,%ymm5
0xc5,0xcc,0x57,0xf6, // vxorps %ymm6,%ymm6,%ymm6
0xc5,0xc4,0x57,0xff, // vxorps %ymm7,%ymm7,%ymm7
- 0xc3, // return
};
static const unsigned char hsw_constant_color[] = {
0xc4,0xe2,0x7d,0x18,0x02, // vbroadcastss (%rdx),%ymm0
0xc4,0xe2,0x7d,0x18,0x4a,0x04, // vbroadcastss 0x4(%rdx),%ymm1
0xc4,0xe2,0x7d,0x18,0x52,0x08, // vbroadcastss 0x8(%rdx),%ymm2
0xc4,0xe2,0x7d,0x18,0x5a,0x0c, // vbroadcastss 0xc(%rdx),%ymm3
- 0xc3, // return
};
static const unsigned char hsw_clear[] = {
0xc5,0xfc,0x57,0xc0, // vxorps %ymm0,%ymm0,%ymm0
0xc5,0xf4,0x57,0xc9, // vxorps %ymm1,%ymm1,%ymm1
0xc5,0xec,0x57,0xd2, // vxorps %ymm2,%ymm2,%ymm2
0xc5,0xe4,0x57,0xdb, // vxorps %ymm3,%ymm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_plus_[] = {
0xc5,0xfc,0x58,0xc4, // vaddps %ymm4,%ymm0,%ymm0
0xc5,0xf4,0x58,0xcd, // vaddps %ymm5,%ymm1,%ymm1
0xc5,0xec,0x58,0xd6, // vaddps %ymm6,%ymm2,%ymm2
0xc5,0xe4,0x58,0xdf, // vaddps %ymm7,%ymm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_srcover[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -1582,7 +1493,6 @@
0xc4,0xc2,0x55,0xb8,0xc8, // vfmadd231ps %ymm8,%ymm5,%ymm1
0xc4,0xc2,0x4d,0xb8,0xd0, // vfmadd231ps %ymm8,%ymm6,%ymm2
0xc4,0xc2,0x45,0xb8,0xd8, // vfmadd231ps %ymm8,%ymm7,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_dstover[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -1591,7 +1501,6 @@
0xc4,0xe2,0x3d,0xa8,0xcd, // vfmadd213ps %ymm5,%ymm8,%ymm1
0xc4,0xe2,0x3d,0xa8,0xd6, // vfmadd213ps %ymm6,%ymm8,%ymm2
0xc4,0xe2,0x3d,0xa8,0xdf, // vfmadd213ps %ymm7,%ymm8,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_clamp_0[] = {
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
@@ -1599,7 +1508,6 @@
0xc4,0xc1,0x74,0x5f,0xc8, // vmaxps %ymm8,%ymm1,%ymm1
0xc4,0xc1,0x6c,0x5f,0xd0, // vmaxps %ymm8,%ymm2,%ymm2
0xc4,0xc1,0x64,0x5f,0xd8, // vmaxps %ymm8,%ymm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_clamp_1[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -1607,7 +1515,6 @@
0xc4,0xc1,0x74,0x5d,0xc8, // vminps %ymm8,%ymm1,%ymm1
0xc4,0xc1,0x6c,0x5d,0xd0, // vminps %ymm8,%ymm2,%ymm2
0xc4,0xc1,0x64,0x5d,0xd8, // vminps %ymm8,%ymm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_clamp_a[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
@@ -1615,7 +1522,6 @@
0xc5,0xfc,0x5d,0xc3, // vminps %ymm3,%ymm0,%ymm0
0xc5,0xf4,0x5d,0xcb, // vminps %ymm3,%ymm1,%ymm1
0xc5,0xec,0x5d,0xd3, // vminps %ymm3,%ymm2,%ymm2
- 0xc3, // return
};
static const unsigned char hsw_swap[] = {
0xc5,0x7c,0x28,0xc3, // vmovaps %ymm3,%ymm8
@@ -1630,27 +1536,23 @@
0xc5,0x7c,0x29,0xd5, // vmovaps %ymm10,%ymm5
0xc5,0x7c,0x29,0xce, // vmovaps %ymm9,%ymm6
0xc5,0x7c,0x29,0xc7, // vmovaps %ymm8,%ymm7
- 0xc3, // return
};
static const unsigned char hsw_move_src_dst[] = {
0xc5,0xfc,0x28,0xe0, // vmovaps %ymm0,%ymm4
0xc5,0xfc,0x28,0xe9, // vmovaps %ymm1,%ymm5
0xc5,0xfc,0x28,0xf2, // vmovaps %ymm2,%ymm6
0xc5,0xfc,0x28,0xfb, // vmovaps %ymm3,%ymm7
- 0xc3, // return
};
static const unsigned char hsw_move_dst_src[] = {
0xc5,0xfc,0x28,0xc4, // vmovaps %ymm4,%ymm0
0xc5,0xfc,0x28,0xcd, // vmovaps %ymm5,%ymm1
0xc5,0xfc,0x28,0xd6, // vmovaps %ymm6,%ymm2
0xc5,0xfc,0x28,0xdf, // vmovaps %ymm7,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_premul[] = {
0xc5,0xfc,0x59,0xc3, // vmulps %ymm3,%ymm0,%ymm0
0xc5,0xf4,0x59,0xcb, // vmulps %ymm3,%ymm1,%ymm1
0xc5,0xec,0x59,0xd3, // vmulps %ymm3,%ymm2,%ymm2
- 0xc3, // return
};
static const unsigned char hsw_unpremul[] = {
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
@@ -1661,7 +1563,6 @@
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0,%ymm8,%ymm0
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
- 0xc3, // return
};
static const unsigned char hsw_from_srgb[] = {
0xc4,0x62,0x7d,0x18,0x41,0x40, // vbroadcastss 0x40(%rcx),%ymm8
@@ -1689,7 +1590,6 @@
0xc4,0x42,0x35,0xa8,0xde, // vfmadd213ps %ymm14,%ymm9,%ymm11
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10,%ymm2,%ymm2
0xc4,0xc3,0x25,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm11,%ymm2
- 0xc3, // return
};
static const unsigned char hsw_to_srgb[] = {
0xc5,0x7c,0x52,0xc0, // vrsqrtps %ymm0,%ymm8
@@ -1725,7 +1625,6 @@
0xc5,0x3c,0x59,0xc2, // vmulps %ymm2,%ymm8,%ymm8
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10,%ymm2,%ymm2
0xc4,0xc3,0x35,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm9,%ymm2
- 0xc3, // return
};
static const unsigned char hsw_scale_u8[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1737,7 +1636,6 @@
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
0xc5,0xbc,0x59,0xdb, // vmulps %ymm3,%ymm8,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_load_tables[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1762,7 +1660,6 @@
0xc5,0xfc,0x5b,0xdb, // vcvtdq2ps %ymm3,%ymm3
0xc4,0x62,0x7d,0x18,0x41,0x0c, // vbroadcastss 0xc(%rcx),%ymm8
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_load_8888[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1783,7 +1680,6 @@
0xc5,0xe5,0x72,0xd3,0x18, // vpsrld $0x18,%ymm3,%ymm3
0xc5,0xfc,0x5b,0xdb, // vcvtdq2ps %ymm3,%ymm3
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_store_8888[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1803,7 +1699,6 @@
0xc4,0x41,0x2d,0xeb,0xc0, // vpor %ymm8,%ymm10,%ymm8
0xc4,0x41,0x35,0xeb,0xc0, // vpor %ymm8,%ymm9,%ymm8
0xc5,0x7e,0x7f,0x04,0xb8, // vmovdqu %ymm8,(%rax,%rdi,4)
- 0xc3, // return
};
static const unsigned char hsw_load_f16[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1827,7 +1722,6 @@
0xc4,0xe2,0x7d,0x13,0xd2, // vcvtph2ps %xmm2,%ymm2
0xc4,0xc1,0x39,0x6d,0xda, // vpunpckhqdq %xmm10,%xmm8,%xmm3
0xc4,0xe2,0x7d,0x13,0xdb, // vcvtph2ps %xmm3,%ymm3
- 0xc3, // return
};
static const unsigned char hsw_store_f16[] = {
0x48,0x8b,0x02, // mov (%rdx),%rax
@@ -1847,7 +1741,6 @@
0xc5,0x7a,0x7f,0x4c,0xf8,0x20, // vmovdqu %xmm9,0x20(%rax,%rdi,8)
0xc4,0x41,0x39,0x6a,0xc2, // vpunpckhdq %xmm10,%xmm8,%xmm8
0xc5,0x7a,0x7f,0x44,0xf8,0x30, // vmovdqu %xmm8,0x30(%rax,%rdi,8)
- 0xc3, // return
};
static const unsigned char hsw_clamp_x[] = {
0xc4,0x62,0x7d,0x58,0x02, // vpbroadcastd (%rdx),%ymm8
@@ -1856,7 +1749,6 @@
0xc4,0xc1,0x7c,0x5d,0xc0, // vminps %ymm8,%ymm0,%ymm0
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
0xc5,0xbc,0x5f,0xc0, // vmaxps %ymm0,%ymm8,%ymm0
- 0xc3, // return
};
static const unsigned char hsw_clamp_y[] = {
0xc4,0x62,0x7d,0x58,0x02, // vpbroadcastd (%rdx),%ymm8
@@ -1865,7 +1757,6 @@
0xc4,0xc1,0x74,0x5d,0xc8, // vminps %ymm8,%ymm1,%ymm1
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
0xc5,0xbc,0x5f,0xc9, // vmaxps %ymm1,%ymm8,%ymm1
- 0xc3, // return
};
static const unsigned char hsw_matrix_2x3[] = {
0xc4,0x62,0x7d,0x18,0x0a, // vbroadcastss (%rdx),%ymm9
@@ -1880,7 +1771,6 @@
0xc4,0x42,0x7d,0xb8,0xca, // vfmadd231ps %ymm10,%ymm0,%ymm9
0xc5,0x7c,0x29,0xc0, // vmovaps %ymm8,%ymm0
0xc5,0x7c,0x29,0xc9, // vmovaps %ymm9,%ymm1
- 0xc3, // return
};
static const unsigned char hsw_matrix_3x4[] = {
0xc4,0x62,0x7d,0x18,0x0a, // vbroadcastss (%rdx),%ymm9
@@ -1907,7 +1797,6 @@
0xc5,0x7c,0x29,0xc0, // vmovaps %ymm8,%ymm0
0xc5,0x7c,0x29,0xc9, // vmovaps %ymm9,%ymm1
0xc5,0x7c,0x29,0xd2, // vmovaps %ymm10,%ymm2
- 0xc3, // return
};
static const unsigned char hsw_linear_gradient_2stops[] = {
0xc4,0xe2,0x7d,0x18,0x4a,0x10, // vbroadcastss 0x10(%rdx),%ymm1
@@ -1923,6 +1812,5 @@
0xc4,0xe2,0x7d,0x18,0x5a,0x0c, // vbroadcastss 0xc(%rdx),%ymm3
0xc4,0xc2,0x7d,0xb8,0xd9, // vfmadd231ps %ymm9,%ymm0,%ymm3
0xc5,0x7c,0x29,0xc0, // vmovaps %ymm8,%ymm0
- 0xc3, // return
};
#endif//SkSplicer_generated_DEFINED
diff --git a/src/splicer/build_stages.py b/src/splicer/build_stages.py
index 21d94d8..a6f55c3 100755
--- a/src/splicer/build_stages.py
+++ b/src/splicer/build_stages.py
@@ -53,7 +53,7 @@
['-c', 'src/splicer/SkSplicer_stages.cpp'] +
['-o', 'armv7.o'])
-def parse_object_file(dot_o, array_type, jump, ret, target=None):
+def parse_object_file(dot_o, array_type, jump, target=None):
prefix = dot_o.replace('.o', '_')
cmd = [ objdump, '-d', '--insn-width=8', dot_o]
if target:
@@ -83,17 +83,14 @@
assert 'rip' not in arg # TODO: detect on aarch64 too
# At the end of every stage function there's a jump to next().
- # We replace that with a ret to make these stages work with an interpreter.
+ # This marks the splice point.
if code == jump:
- code = ret
- inst = 'return'
- args = ''
+ print '};'
+ continue
hexed = ''.join('0x'+x+',' for x in code.split(' '))
print ' ' + hexed + ' '*(44-len(hexed)) + \
'// ' + inst + (' '*(14-len(inst)) + args if args else '')
- if code == ret:
- print '};'
print '''/*
* Copyright 2017 Google Inc.
@@ -108,10 +105,10 @@
// This file is generated semi-automatically with this command:
// $ src/splicer/build_stages.py
'''
-parse_object_file('aarch64.o', 'unsigned int', '14000000', 'd65f03c0')
-parse_object_file( 'armv7.o', 'unsigned int', 'eafffffe', 'e12fff1e',
+parse_object_file('aarch64.o', 'unsigned int', '14000000')
+parse_object_file( 'armv7.o', 'unsigned int', 'eafffffe',
target='elf32-littlearm')
-parse_object_file( 'sse2.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
-#parse_object_file('sse41.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
-parse_object_file( 'hsw.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
+parse_object_file( 'sse2.o', 'unsigned char', 'e9 00 00 00 00')
+#parse_object_file('sse41.o', 'unsigned char', 'e9 00 00 00 00')
+parse_object_file( 'hsw.o', 'unsigned char', 'e9 00 00 00 00')
print '#endif//SkSplicer_generated_DEFINED'